{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Braund, Mr. Owen Harris</td>\n",
       "      <td>male</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5 21171</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17599</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>C85</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Heikkinen, Miss. Laina</td>\n",
       "      <td>female</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>STON/O2. 3101282</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
       "      <td>female</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>113803</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>C123</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Allen, Mr. William Henry</td>\n",
       "      <td>male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>373450</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>886</th>\n",
       "      <td>887</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>Montvila, Rev. Juozas</td>\n",
       "      <td>male</td>\n",
       "      <td>27.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>211536</td>\n",
       "      <td>13.0000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>887</th>\n",
       "      <td>888</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Graham, Miss. Margaret Edith</td>\n",
       "      <td>female</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>112053</td>\n",
       "      <td>30.0000</td>\n",
       "      <td>B42</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>888</th>\n",
       "      <td>889</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Johnston, Miss. Catherine Helen \"Carrie\"</td>\n",
       "      <td>female</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>W./C. 6607</td>\n",
       "      <td>23.4500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>889</th>\n",
       "      <td>890</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Behr, Mr. Karl Howell</td>\n",
       "      <td>male</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>111369</td>\n",
       "      <td>30.0000</td>\n",
       "      <td>C148</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890</th>\n",
       "      <td>891</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Dooley, Mr. Patrick</td>\n",
       "      <td>male</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>370376</td>\n",
       "      <td>7.7500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>891 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     PassengerId  Survived  Pclass  \\\n",
       "0              1         0       3   \n",
       "1              2         1       1   \n",
       "2              3         1       3   \n",
       "3              4         1       1   \n",
       "4              5         0       3   \n",
       "..           ...       ...     ...   \n",
       "886          887         0       2   \n",
       "887          888         1       1   \n",
       "888          889         0       3   \n",
       "889          890         1       1   \n",
       "890          891         0       3   \n",
       "\n",
       "                                                  Name     Sex   Age  SibSp  \\\n",
       "0                              Braund, Mr. Owen Harris    male  22.0      1   \n",
       "1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
       "2                               Heikkinen, Miss. Laina  female  26.0      0   \n",
       "3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
       "4                             Allen, Mr. William Henry    male  35.0      0   \n",
       "..                                                 ...     ...   ...    ...   \n",
       "886                              Montvila, Rev. Juozas    male  27.0      0   \n",
       "887                       Graham, Miss. Margaret Edith  female  19.0      0   \n",
       "888           Johnston, Miss. Catherine Helen \"Carrie\"  female   NaN      1   \n",
       "889                              Behr, Mr. Karl Howell    male  26.0      0   \n",
       "890                                Dooley, Mr. Patrick    male  32.0      0   \n",
       "\n",
       "     Parch            Ticket     Fare Cabin Embarked  \n",
       "0        0         A/5 21171   7.2500   NaN        S  \n",
       "1        0          PC 17599  71.2833   C85        C  \n",
       "2        0  STON/O2. 3101282   7.9250   NaN        S  \n",
       "3        0            113803  53.1000  C123        S  \n",
       "4        0            373450   8.0500   NaN        S  \n",
       "..     ...               ...      ...   ...      ...  \n",
       "886      0            211536  13.0000   NaN        S  \n",
       "887      0            112053  30.0000   B42        S  \n",
       "888      2        W./C. 6607  23.4500   NaN        S  \n",
       "889      0            111369  30.0000  C148        C  \n",
       "890      0            370376   7.7500   NaN        Q  \n",
       "\n",
       "[891 rows x 12 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#读取数据\n",
    "train_data=pd.read_csv('train.csv')\n",
    "test_data=pd.read_csv('test.csv')\n",
    "train_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>892</td>\n",
       "      <td>3</td>\n",
       "      <td>Kelly, Mr. James</td>\n",
       "      <td>male</td>\n",
       "      <td>34.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>330911</td>\n",
       "      <td>7.8292</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>893</td>\n",
       "      <td>3</td>\n",
       "      <td>Wilkes, Mrs. James (Ellen Needs)</td>\n",
       "      <td>female</td>\n",
       "      <td>47.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>363272</td>\n",
       "      <td>7.0000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>894</td>\n",
       "      <td>2</td>\n",
       "      <td>Myles, Mr. Thomas Francis</td>\n",
       "      <td>male</td>\n",
       "      <td>62.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>240276</td>\n",
       "      <td>9.6875</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>895</td>\n",
       "      <td>3</td>\n",
       "      <td>Wirz, Mr. Albert</td>\n",
       "      <td>male</td>\n",
       "      <td>27.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>315154</td>\n",
       "      <td>8.6625</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>896</td>\n",
       "      <td>3</td>\n",
       "      <td>Hirvonen, Mrs. Alexander (Helga E Lindqvist)</td>\n",
       "      <td>female</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3101298</td>\n",
       "      <td>12.2875</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>413</th>\n",
       "      <td>1305</td>\n",
       "      <td>3</td>\n",
       "      <td>Spector, Mr. Woolf</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>A.5. 3236</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>414</th>\n",
       "      <td>1306</td>\n",
       "      <td>1</td>\n",
       "      <td>Oliva y Ocana, Dona. Fermina</td>\n",
       "      <td>female</td>\n",
       "      <td>39.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17758</td>\n",
       "      <td>108.9000</td>\n",
       "      <td>C105</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>415</th>\n",
       "      <td>1307</td>\n",
       "      <td>3</td>\n",
       "      <td>Saether, Mr. Simon Sivertsen</td>\n",
       "      <td>male</td>\n",
       "      <td>38.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>SOTON/O.Q. 3101262</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>416</th>\n",
       "      <td>1308</td>\n",
       "      <td>3</td>\n",
       "      <td>Ware, Mr. Frederick</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>359309</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>417</th>\n",
       "      <td>1309</td>\n",
       "      <td>3</td>\n",
       "      <td>Peter, Master. Michael J</td>\n",
       "      <td>male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2668</td>\n",
       "      <td>22.3583</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>418 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     PassengerId  Pclass                                          Name  \\\n",
       "0            892       3                              Kelly, Mr. James   \n",
       "1            893       3              Wilkes, Mrs. James (Ellen Needs)   \n",
       "2            894       2                     Myles, Mr. Thomas Francis   \n",
       "3            895       3                              Wirz, Mr. Albert   \n",
       "4            896       3  Hirvonen, Mrs. Alexander (Helga E Lindqvist)   \n",
       "..           ...     ...                                           ...   \n",
       "413         1305       3                            Spector, Mr. Woolf   \n",
       "414         1306       1                  Oliva y Ocana, Dona. Fermina   \n",
       "415         1307       3                  Saether, Mr. Simon Sivertsen   \n",
       "416         1308       3                           Ware, Mr. Frederick   \n",
       "417         1309       3                      Peter, Master. Michael J   \n",
       "\n",
       "        Sex   Age  SibSp  Parch              Ticket      Fare Cabin Embarked  \n",
       "0      male  34.5      0      0              330911    7.8292   NaN        Q  \n",
       "1    female  47.0      1      0              363272    7.0000   NaN        S  \n",
       "2      male  62.0      0      0              240276    9.6875   NaN        Q  \n",
       "3      male  27.0      0      0              315154    8.6625   NaN        S  \n",
       "4    female  22.0      1      1             3101298   12.2875   NaN        S  \n",
       "..      ...   ...    ...    ...                 ...       ...   ...      ...  \n",
       "413    male   NaN      0      0           A.5. 3236    8.0500   NaN        S  \n",
       "414  female  39.0      0      0            PC 17758  108.9000  C105        C  \n",
       "415    male  38.5      0      0  SOTON/O.Q. 3101262    7.2500   NaN        S  \n",
       "416    male   NaN      0      0              359309    8.0500   NaN        S  \n",
       "417    male   NaN      1      1                2668   22.3583   NaN        C  \n",
       "\n",
       "[418 rows x 11 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 891 entries, 0 to 890\n",
      "Data columns (total 12 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   PassengerId  891 non-null    int64  \n",
      " 1   Survived     891 non-null    int64  \n",
      " 2   Pclass       891 non-null    int64  \n",
      " 3   Name         891 non-null    object \n",
      " 4   Sex          891 non-null    object \n",
      " 5   Age          714 non-null    float64\n",
      " 6   SibSp        891 non-null    int64  \n",
      " 7   Parch        891 non-null    int64  \n",
      " 8   Ticket       891 non-null    object \n",
      " 9   Fare         891 non-null    float64\n",
      " 10  Cabin        204 non-null    object \n",
      " 11  Embarked     889 non-null    object \n",
      "dtypes: float64(2), int64(5), object(5)\n",
      "memory usage: 83.7+ KB\n"
     ]
    }
   ],
   "source": [
    "#看一下训练集数据的信息\n",
    "train_data.info()#可以一共有891行数据 其中Age Cabin 确实的比较多 Embarked缺失了两个"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 418 entries, 0 to 417\n",
      "Data columns (total 11 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   PassengerId  418 non-null    int64  \n",
      " 1   Pclass       418 non-null    int64  \n",
      " 2   Name         418 non-null    object \n",
      " 3   Sex          418 non-null    object \n",
      " 4   Age          332 non-null    float64\n",
      " 5   SibSp        418 non-null    int64  \n",
      " 6   Parch        418 non-null    int64  \n",
      " 7   Ticket       418 non-null    object \n",
      " 8   Fare         417 non-null    float64\n",
      " 9   Cabin        91 non-null     object \n",
      " 10  Embarked     418 non-null    object \n",
      "dtypes: float64(2), int64(4), object(5)\n",
      "memory usage: 36.0+ KB\n"
     ]
    }
   ],
   "source": [
    "#看一下测试集的信息\n",
    "test_data.info()#Age 确实的比较多    Fare确实的比较少缺失了1个  Cabin缺失的比较多"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Fare</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>891.000000</td>\n",
       "      <td>891.000000</td>\n",
       "      <td>891.000000</td>\n",
       "      <td>714.000000</td>\n",
       "      <td>891.000000</td>\n",
       "      <td>891.000000</td>\n",
       "      <td>891.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>446.000000</td>\n",
       "      <td>0.383838</td>\n",
       "      <td>2.308642</td>\n",
       "      <td>29.699118</td>\n",
       "      <td>0.523008</td>\n",
       "      <td>0.381594</td>\n",
       "      <td>32.204208</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>257.353842</td>\n",
       "      <td>0.486592</td>\n",
       "      <td>0.836071</td>\n",
       "      <td>14.526497</td>\n",
       "      <td>1.102743</td>\n",
       "      <td>0.806057</td>\n",
       "      <td>49.693429</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.420000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>223.500000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>20.125000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>7.910400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>446.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>28.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>14.454200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>668.500000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>38.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>31.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>891.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>80.000000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>512.329200</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       PassengerId    Survived      Pclass         Age       SibSp  \\\n",
       "count   891.000000  891.000000  891.000000  714.000000  891.000000   \n",
       "mean    446.000000    0.383838    2.308642   29.699118    0.523008   \n",
       "std     257.353842    0.486592    0.836071   14.526497    1.102743   \n",
       "min       1.000000    0.000000    1.000000    0.420000    0.000000   \n",
       "25%     223.500000    0.000000    2.000000   20.125000    0.000000   \n",
       "50%     446.000000    0.000000    3.000000   28.000000    0.000000   \n",
       "75%     668.500000    1.000000    3.000000   38.000000    1.000000   \n",
       "max     891.000000    1.000000    3.000000   80.000000    8.000000   \n",
       "\n",
       "            Parch        Fare  \n",
       "count  891.000000  891.000000  \n",
       "mean     0.381594   32.204208  \n",
       "std      0.806057   49.693429  \n",
       "min      0.000000    0.000000  \n",
       "25%      0.000000    7.910400  \n",
       "50%      0.000000   14.454200  \n",
       "75%      0.000000   31.000000  \n",
       "max      6.000000  512.329200  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看数据分布\n",
    "train_data.describe()#有7列 只有数值类型的数据  这个describe()之后才能够显示出来 非数值型的没有显示出来"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>891</td>\n",
       "      <td>891</td>\n",
       "      <td>891</td>\n",
       "      <td>204</td>\n",
       "      <td>889</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>891</td>\n",
       "      <td>2</td>\n",
       "      <td>681</td>\n",
       "      <td>147</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>Hickman, Mr. Lewis</td>\n",
       "      <td>male</td>\n",
       "      <td>CA. 2343</td>\n",
       "      <td>B96 B98</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>1</td>\n",
       "      <td>577</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "      <td>644</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      Name   Sex    Ticket    Cabin Embarked\n",
       "count                  891   891       891      204      889\n",
       "unique                 891     2       681      147        3\n",
       "top     Hickman, Mr. Lewis  male  CA. 2343  B96 B98        S\n",
       "freq                     1   577         7        4      644"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data.describe(include=['O'])#对于非数值型的数据分布 describe要加参数:include=['O']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 填充缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "24.00    30\n",
       "22.00    27\n",
       "18.00    26\n",
       "19.00    25\n",
       "30.00    25\n",
       "         ..\n",
       "55.50     1\n",
       "70.50     1\n",
       "66.00     1\n",
       "23.50     1\n",
       "0.42      1\n",
       "Name: Age, Length: 88, dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#填充确实的数据  训练集有Age Cabin Embarked缺失    测试集有Age Fare Cabin缺失\n",
    "#先填充age 看看age的分布\n",
    "train_data['Age'].value_counts()#这样还不是很明显  最好做一下试图的可视化  看看是怎么分布的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEWCAYAAABhffzLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVfklEQVR4nO3de5Bk5X3e8e/DRTcQN7FFLZfRSgaBiS0WssFQEFtGFwN2LKmiqCAqgR2slRMRwCZlIykli7IrwQkIOSVF8mIIxJKxLlxEsCJBMLFLREICxGVhhblKgBZWGDAX2bIXfvmjz5jW7OzOzDKnu3fe76eqa06fc7rPr7vPPPPO22+/napCktSO7cZdgCRptAx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPxa0pJ8KMkfjfH470lyzSLf54oklWSHxbxftSOO45dml+Ri4OGq+o/jrmVYkhXAA8COVbVxjn3fBHymqvbtvzJtK2zxS1vJFre2VQa/xiLJWUnuS/JMkruSvHNo2/ZJzkvyeJIHkpw63LWRZNckFyZZn+SRJL+XZPvNHOejST7TLU93kZyc5Hvd/X94M7dbDbwH+K0kzyb5X936B5P8dpLbgeeS7DDHY/mVJF8bul5Jfj3JPUmeSvLJJJnjudo+ybldvfcDvzhj+68mWdcd//4k7+/W7wT8b2Dv7jE8m2TvJIcn+Xp3/PVJPpHkZVuqQUuLwa9xuQ/458CuwNnAZ5Is77a9DzgOWAkcBrxjxm0vBjYC+wOHAm8Dfm0Bxz4aOBB4M/CRJD85c4eqWgN8FvgvVbVzVf2Loc0nMgjf3bquli09ltn8EvDPgDcC7wZ+YY5639fd5lBgFfCuGds3dNt3AX4VOD/JYVX1HIPn8fvdY9i5qr4PPA/8BrAncGT3PPy7OWrQEmLwayyq6gtV9f2qeqGqPgfcAxzebX438AdV9XBVPQmcM327JHsBxwNnVNVzVbUBOB84YQGHP7uq/raqbgNuAw5ZYPn/raoeqqq/ncdjmc05VfVUVX0PuJ7BH7gteTfw8e6YTwD/eXhjVf1ZVd1XA38BXMPgD9GsqurmqvpGVW2sqgeBPwR+bo4atITYR6mxSHIS8JvAim7VzgxaoAB7Aw8N7T68/FpgR2D9UA/JdjP2mcujQ8s/7I69ED92rDkey2Icf+bz8d0Zxz8O+B3gDQyei1cBd2zuzpK8AfgYg/8eXsUgB26eowYtIbb4NXJJXgtcAJwKvKaqdgPWAtNJvh4YHoWy39DyQ8CPgD2rarfusktV/ZMeSt3ckLd/XD+Px7IY1vPjz8HU0PFfDlwGnAvs1R3/y0PHn+0xfAr4DnBAVe0CfGiR69WEM/g1DjsxCKQfwODNSeCnhrZ/Hjg9yT5JdgN+e3pDVa1n0JVxXpJdkmyX5CeS9NFV8Rjw+jn2meuxLIbPA6cl2TfJ7sBZQ9teBry8O/7GrvX/tqHtjwGvSbLr0LpXA08DzyY5CPi3i1yvJpzBr5GrqruA84CvMwimnwZuGNrlAgbhfjvwbQYt2I0M3pQEOIlB4N0FPAl8EdjSm6lb60Lg4G70y5Wz7TCPx7IYLgC+yuD9iFuAy4eO/wxwGoM/Dk8C/xq4amj7d4BLgfu7x7E38B+6/Z7p7vtzi1yvJpwf4NLE61qxn66q1467FmkpsMWviZPklUmO78bI78Pgjcsrxl2XtFQY/JpEYTAe/kkGXT3rgI+MtaKeJfn00Ieshi+fHndtWnrs6pGkxtjil6TGbBMf4Npzzz1rxYoV4y5DkrYpN9988+NVtWzm+m0i+FesWMFNN9007jIkaZuS5LuzrberR5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDWmt+BP8ook30xyW5I7k5zdrX9dkhuT3Jvkc37XpySNVp8t/h8Bx1TVIQy+Wu7YJEcAvw+cX1X7M5iL5ZQea5AkzdBb8Hff//lsd3XH7lLAMQzmTwe4hE2/SFuS1KNeP7mbZHsG3+W5P/BJ4D7gqara2O3yMLDPZm67GlgNMDU1NdsumhArzvqzf1x+8JxfHGMlkuaj1zd3q+r5qlrJ4PtTDwcOWsBt11TVqqpatWzZJlNNSJK20khG9VTVU8D1wJHAbkmm/9PYF3hkFDVIkgb6HNWzrPuibJK8Engrgy/UuB54V7fbycCX+qpBkrSpPvv4lwOXdP382wGfr6qrk9wF/GmS32Pw7UoX9liDJGmG3oK/qm4HDp1l/f0M+vslSWPgJ3clqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmN6/c5dLR1+r660dNjil6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY1xOGdDHJIpCWzxS1JzDH5JaozBL0mN6S34k+yX5PokdyW5M8np3fqPJnkkya3d5fi+apAkbarPN3c3AmdW1S1JXg3cnOTabtv5VXVuj8eWJG1Gb8FfVeuB9d3yM0nWAfv0dTxJ0vyMZDhnkhXAocCNwFHAqUlOAm5i8F/Bk7PcZjWwGmBqamoUZTZnUoZ3bqmOSalRWkp6f3M3yc7AZcAZVfU08CngJ4CVDP4jOG+221XVmqpaVVWrli1b1neZktSMXoM/yY4MQv+zVXU5QFU9VlXPV9ULwAXA4X3WIEn6cX2O6glwIbCuqj42tH750G7vBNb2VYMkaVN99vEfBbwXuCPJrd26DwEnJlkJFPAg8P4ea5AkzdDnqJ6vAZll05f7OqYkaW5+cleSGuPsnJqVwyilpcsWvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwzm16BwKKk02W/yS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQ7nVLMcdqpW2eKXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1Jjegj/JfkmuT3JXkjuTnN6t3yPJtUnu6X7u3lcNkqRN9dni3wicWVUHA0cAH0hyMHAWcF1VHQBc112XJI1Ib8FfVeur6pZu+RlgHbAP8Hbgkm63S4B39FWDJGlTI5mdM8kK4FDgRmCvqlrfbXoU2Gszt1kNrAaYmpoaQZWaBM6YKfWv9zd3k+wMXAacUVVPD2+rqgJqtttV1ZqqWlVVq5YtW9Z3mZLUjF6DP8mODEL/s1V1ebf6sSTLu+3LgQ191iBJ+nF9juoJcCGwrqo+NrTpKuDkbvlk4Et91SBJ2lSfffxHAe8F7khya7fuQ8A5wOeTnAJ8F3h3jzVIkmboLfir6mtANrP5zX0dV5K0ZX5yV5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktSYkczOqbbNnHFzMWfgXMh9OfOnNGCLX5IaY/BLUmMMfklqjMEvSY0x+CWpMY7qWUJaGLXSwmOU+jav4E+yG3ASsGL4NlV1Wi9VSZJ6M98W/5eBbwB3AC/0V44kqW/zDf5XVNVv9lqJJGkk5vvm7h8neV+S5Un2mL70WpkkqRfzbfH/PfBfgQ8D1a0r4PV9FCVJ6s98g/9MYP+qerzPYiRJ/ZtvV8+9wA/7LESSNBrzbfE/B9ya5HrgR9MrHc4pSdue+Qb/ld1FkrSNm1fwV9UlfRciSRqNefXxJ3kgyf0zL3Pc5qIkG5KsHVr30SSPJLm1uxz/Uh+AJGlh5tvVs2po+RXAvwLmGsd/MfAJ4H/OWH9+VZ07z+NKkhbZvFr8VfXXQ5dHqurjwBZnyKqqvwSeWIQaJUmLaL6TtB02dHU7Bv8BbO3MnqcmOQm4CTizqp7czDFXA6sBpqamtvJQmq+Fzno5vP8k2VxdzuQpvWi+4X0eL35idyPwIIPunoX6FPC73X39bne//2a2HatqDbAGYNWqVTXbPpKkhZvvB7iOAy4ErgNuAB4BTljowarqsap6vqpeAC4ADl/ofUiSXpqFjON/CrgF+LutPViS5VW1vrv6TmDtlvaXJC2++Qb/vlV17ELuOMmlwJuAPZM8DPwO8KYkKxl09TwIvH8h9ylJeunmG/z/L8lPV9Ud873jqjpxltUXzvf2kqR+zDf4jwZ+JckDDObqCVBV9cbeKpMk9WK+wX9cr1VIE84veddSMt+5er7bdyGSpNGY73BOSdISYfBLUmMMfklqjMEvSY0x+CWpMVs7w6a2AQ5B7M9cz63PvSaZLX5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGIdzSmPm0E+Nmi1+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiHc0qzcIilljJb/JLUGINfkhpj8EtSY3oL/iQXJdmQZO3Quj2SXJvknu7n7n0dX5I0uz5b/BcDx85YdxZwXVUdAFzXXZckjVBvwV9Vfwk8MWP124FLuuVLgHf0dXxJ0uxGPZxzr6pa3y0/Cuy1uR2TrAZWA0xNTS16IX5ZtpYiz1vNx9je3K2qAmoL29dU1aqqWrVs2bIRViZJS9uog/+xJMsBup8bRnx8SWreqIP/KuDkbvlk4EsjPr4kNa/P4ZyXAl8HDkzycJJTgHOAtya5B3hLd12SNEK9vblbVSduZtOb+zqmJGlufnJXkhrj7JzSiC10yOXw/sMcrqmtZYtfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbhnCPgjInaFnietsMWvyQ1xuCXpMYY/JLUGINfkhpj8EtSY5oZ1eOIBbXI816zscUvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGtPMcM65bCvD3raVOrdFm/tu24XedpSvy0KOO/Pxef60yxa/JDXG4Jekxhj8ktSYsfTxJ3kQeAZ4HthYVavGUYcktWicb+7+fFU9PsbjS1KT7OqRpMaMq8VfwDVJCvjDqlozc4ckq4HVAFNTUyMub9vg0M5tQwuvUwuPcSkZV4v/6Ko6DDgO+ECSn525Q1WtqapVVbVq2bJlo69QkpaosQR/VT3S/dwAXAEcPo46JKlFIw/+JDslefX0MvA2YO2o65CkVo2jj38v4Iok08f/k6r6yhjqkKQmjTz4q+p+4JBRH1eSNOBwTklqjLNzztPWzoK4mPtK4/RSztUt3dbfgdGzxS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5Ia43DOrTBz+JnD0bSYWjiftsUvtl9KbPFLUmMMfklqjMEvSY0x+CWpMQa/JDXG4JekxjQxnPOlDB1bbAsdiubQtRfN9TqO63VezONuK+fqXOflYs7k2efvwOae76X+u2aLX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDVmyQ/nnKThcfMxCcM3F2vmxEkyqUNBx2kh59qkPj8z6xrl78yohpn28Zhs8UtSYwx+SWqMwS9JjRlL8Cc5NsndSe5NctY4apCkVo08+JNsD3wSOA44GDgxycGjrkOSWjWOFv/hwL1VdX9V/T3wp8Dbx1CHJDUpVTXaAybvAo6tql/rrr8X+JmqOnXGfquB1d3VA4G7t/KQewKPb+Vt+2RdC2Nd8zeJNYF1LdRi1PXaqlo2c+XEjuOvqjXAmpd6P0luqqpVi1DSorKuhbGu+ZvEmsC6FqrPusbR1fMIsN/Q9X27dZKkERhH8H8LOCDJ65K8DDgBuGoMdUhSk0be1VNVG5OcCnwV2B64qKru7PGQL7m7qCfWtTDWNX+TWBNY10L1VtfI39yVJI2Xn9yVpMYY/JLUmCUd/JMyNUSSi5JsSLJ2aN0eSa5Nck/3c/cR17RfkuuT3JXkziSnT0hdr0jyzSS3dXWd3a1/XZIbu9fyc93AgJFLsn2Sbye5elLqSvJgkjuS3Jrkpm7dWF/HrobdknwxyXeSrEty5LjrSnJg9zxNX55OcsYE1PUb3fm+Nsml3e9Bb+fWkg3+CZsa4mLg2BnrzgKuq6oDgOu666O0ETizqg4GjgA+0D0/467rR8AxVXUIsBI4NskRwO8D51fV/sCTwCkjrmva6cC6oeuTUtfPV9XKoXHf434dAf4A+EpVHQQcwuB5G2tdVXV39zytBP4p8EPginHWlWQf4DRgVVX9FINBLyfQ57lVVUvyAhwJfHXo+geBD46xnhXA2qHrdwPLu+XlwN1jfr6+BLx1kuoCXgXcAvwMg08w7jDbazvCevZlEArHAFcDmZC6HgT2nLFurK8jsCvwAN0Akkmpa0YtbwNuGHddwD7AQ8AeDEZaXg38Qp/n1pJt8fPikznt4W7dpNirqtZ3y48Ce42rkCQrgEOBGyehrq475VZgA3AtcB/wVFVt7HYZ12v5ceC3gBe666+ZkLoKuCbJzd1UJzD+1/F1wA+A/9F1jf1Rkp0moK5hJwCXdstjq6uqHgHOBb4HrAf+BriZHs+tpRz824wa/Ekfy7jaJDsDlwFnVNXTk1BXVT1fg3/F92Uwqd9Bo65hpiS/BGyoqpvHXcssjq6qwxh0a34gyc8ObxzT67gDcBjwqao6FHiOGd0nYz7vXwb8MvCFmdtGXVf3fsLbGfyx3BvYiU27hhfVUg7+SZ8a4rEkywG6nxtGXUCSHRmE/mer6vJJqWtaVT0FXM/g39zdkkx/4HAcr+VRwC8neZDBjLLHMOjDHndd0y1GqmoDg/7qwxn/6/gw8HBV3dhd/yKDPwTjrmvaccAtVfVYd32cdb0FeKCqflBV/wBczuB86+3cWsrBP+lTQ1wFnNwtn8ygj31kkgS4EFhXVR+boLqWJdmtW34lg/cd1jH4A/CucdVVVR+sqn2ragWDc+nPq+o9464ryU5JXj29zKDfei1jfh2r6lHgoSQHdqveDNw17rqGnMiL3Tww3rq+BxyR5FXd7+X0c9XfuTWuN1ZG9KbJ8cBfMegj/vAY67iUQd/dPzBoCZ3CoH/4OuAe4P8Ae4y4pqMZ/Dt7O3Brdzl+Aup6I/Dtrq61wEe69a8Hvgncy+Df85eP8fV8E3D1JNTVHf+27nLn9Hk+7texq2ElcFP3Wl4J7D4hde0E/DWw69C6cZ/3ZwPf6c75PwZe3ue55ZQNktSYpdzVI0mahcEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS3NIcmU3Adqd05OgJTklyV913x1wQZJPdOuXJbksybe6y1HjrV7alB/gkuaQZI+qeqKbQuJbDKbMvYHB3DPPAH8O3FZVpyb5E+C/V9XXkkwxmEr3J8dWvDSLHebeRWreaUne2S3vB7wX+IuqegIgyReAN3Tb3wIcPJhyBYBdkuxcVc+OsmBpSwx+aQuSvIlBmB9ZVT9M8n8ZzKmyuVb8dsARVfV3IylQ2gr28UtbtivwZBf6BzH4msqdgJ9Lsns3be6/HNr/GuDfT19JsnKUxUrzYfBLW/YVYIck64BzgG8wmBf9PzGYOfEGBl99+Dfd/qcBq5LcnuQu4NdHXrE0B9/clbbCdL991+K/Arioqq4Yd13SfNjil7bOR7vvBV7L4EvFrxxrNdIC2OKXpMbY4pekxhj8ktQYg1+SGmPwS1JjDH5Jasz/Bx6TDYUogGmeAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "plt.bar(train_data['Age'].value_counts().index,train_data['Age'].value_counts().values)#横轴是年龄的值，纵轴是年龄的数量\n",
    "plt.title(\"age in train_data\")\n",
    "plt.xlabel(\"age\")\n",
    "plt.ylabel(\"num\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/stu_15527388015/.local/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n",
      "  FutureWarning\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEWCAYAAABhffzLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZs0lEQVR4nO3de5hcVZnv8e+PEBW5BUwTAyFEBFG8AdPD4IFRRhzIbQghAUFHIqABJQKCj8PI6OjgM4dBAoggEAgSBOSSC8Fwl4MyqKAJBggEDQlhSAhJGOAAOuIJvOePvbu60qnqru6uXVXp9fs8Tz29au+19nq7qvrNyt6r1lZEYGZm6dii2QGYmVljOfGbmSXGid/MLDFO/GZmiXHiNzNLjBO/mVlinPhtQJP0DUlXNbH/z0q6p87HHCUpJG1Zz+NaOuR5/GaVSboGWBUR/9LsWMpJGgU8AwyOiA091D0YuC4iRhQfmW0uPOI36yOPuG1z5cRvTSHpLEnLJb0m6UlJE8v2DZI0XdKLkp6RNK381Iak7SXNlLRG0mpJ35U0qEo/35Z0XV7uOEUyRdJ/5cc/u0q7qcBnga9Lel3ST/PtKyX9k6THgD9K2rKH3+Xzkh4sex6STpa0TNIrki6VpB5eq0GSzs/jXQGM67L/eElL8/5XSDop3741cCewc/47vC5pZ0n7S/p13v8aSZdIelt3MdjA4sRvzbIc+Ftge+A7wHWShuf7vgiMAfYB9gOO6NL2GmADsAewL3Ao8IVe9H0QsBdwCPAtSR/oWiEiZgDXA+dFxDYR8Q9lu48lS75D8lMt3f0ulYwH/hr4CHA0cFgP8X4xb7Mv0A5M7rJ/Xb5/O+B44EJJ+0XEH8lex+fz32GbiHgeeBP4KjAU+Fj+Ony5hxhsAHHit6aIiFsi4vmIeCsibgKWAfvnu48Gvh8RqyLiZeDcjnaShgFjgdMj4o8RsQ64EDimF91/JyL+JyIeBR4FPtrL8C+OiOci4n9q+F0qOTciXomI/wLuJ/sHrjtHAxflfb4E/O/ynRFxe0Qsj8wvgHvI/iGqKCIWRcRDEbEhIlYCVwCf6CEGG0B8jtKaQtJxwBnAqHzTNmQjUICdgefKqpeXdwMGA2vKzpBs0aVOT14oK/8p77s3Nuqrh9+lHv13fT2e7dL/GOBfgfeRvRbvBB6vdjBJ7wMuIPvfwzvJ8sCiHmKwAcQjfms4SbsBVwLTgHdFxBBgCdCRydcA5bNQdi0rPwe8AQyNiCH5Y7uI+GABoVab8lbaXsPvUg9r2Pg1GFnW/9uBOcD5wLC8/zvK+q/0O1wGPAXsGRHbAd+oc7zW4pz4rRm2JktI6yG7OAl8qGz/zcBpknaRNAT4p44dEbGG7FTGdEnbSdpC0nslFXGqYi2wew91evpd6uFm4FRJIyTtAJxVtu9twNvz/jfko/9Dy/avBd4lafuybdsCrwKvS3o/8KU6x2stzonfGi4ingSmA78mS0wfBn5ZVuVKsuT+GPA7shHsBrKLkgDHkSW8J4GXgdlAdxdT+2omsHc+++XWShVq+F3q4UrgbrLrEY8Ac8v6fw04lewfh5eBzwC3le1/CvgJsCL/PXYGvpbXey0/9k11jtdanL/AZS0vH8VeHhG7NTsWs4HAI35rOZK2kjQ2nyO/C9mFy3nNjstsoHDit1YksvnwL5Od6lkKfKupERVM0uVlX7Iqf1ze7Nhs4PGpHjOzxHjEb2aWmM3iC1xDhw6NUaNGNTsMM7PNyqJFi16MiLau2zeLxD9q1CgWLlzY7DDMzDYrkp6ttN2neszMEuPEb2aWGCd+M7PEOPGbmSXGid/MLDFO/GZmiSks8Ut6h6TfSHpU0hOSvpNvf4+khyU9Lekm3+vTzKyxihzxvwF8MiI+SnZrudGSDgD+A7gwIvYgW4vlxAJjMDOzLgpL/Pn9P1/Pnw7OHwF8kmz9dIBZbHojbTMzK1Ch39yVNIjsXp57AJcCy4FXImJDXmUVsEuVtlOBqQAjR46sVMUGgDHzP10q3znB9wMxa4RCL+5GxJsRsQ/Z/VP3B97fi7YzIqI9Itrb2jZZasLMzPqoIbN6IuIV4H7gY8AQSR3/0xgBrG5EDGZmlilyVk9bfqNsJG0F/D3ZDTXuBybn1aYA84uKwczMNlXkOf7hwKz8PP8WwM0RsUDSk8CNkr5LdnelmQXGYGZmXRSW+CPiMWDfCttXkJ3vNzOzJvA3d83MEuPEb2aWGCd+M7PEOPGbmSXGid/MLDFO/GZmiXHiNzNLjBO/mVlinPjNzBLjxG9mlhgnfjOzxDjxm5klxonfzCwxTvxmZokp9J67lq4z5owulS+YdFcTIzGzrjziNzNLjBO/mVlinPjNzBLjxG9mlhgnfjOzxDjxm5klxtM5bROXXH9YqTzts3c3MRIzK4JH/GZmiXHiNzNLjBO/mVliCkv8knaVdL+kJyU9Iem0fPu3Ja2WtDh/jC0qBjMz21SRF3c3AGdGxCOStgUWSbo333dhRJxfYN9mZlZFYYk/ItYAa/Lya5KWArsU1Z+ZmdWmIdM5JY0C9gUeBg4Epkk6DlhI9r+Clyu0mQpMBRg5cmQjwrQqLrwhm9751c+05tTOsfPOLZXvmHhW1Xrj5l5aKt9+5CmFxmTWygq/uCtpG2AOcHpEvApcBrwX2IfsfwTTK7WLiBkR0R4R7W1tbUWHaWaWjEITv6TBZEn/+oiYCxARayPizYh4C7gS2L/IGMzMbGNFzuoRMBNYGhEXlG0fXlZtIrCkqBjMzGxTRZ7jPxD4HPC4pMX5tm8Ax0raBwhgJXBSgTGYmVkXRc7qeRBQhV13FNWnmZn1zN/cNTNLjBO/1c3Zt4zm7FtG91zRzJrKid/MLDFO/GZmiXHiNzNLjBO/mVlinPjNzBLjxG9mlhjfbN1a1thbzyyV7zii4lp+ZtYHHvGbmSXGid/MLDFO/GZmiXHiNzNLjBO/mVlinPjNzBLj6ZxmBTt89vxS+bbJE5oYiVnGI34zs8Q48ZuZJcaJ38wsMU78ZmaJceI3M0uME7+ZWWKc+M3MEuPEb2aWGCd+M7PEOPGbmSWmsMQvaVdJ90t6UtITkk7Lt+8o6V5Jy/KfOxQVg5mZbarIEf8G4MyI2Bs4ADhF0t7AWcB9EbEncF/+3MzMGqSwxB8RayLikbz8GrAU2AWYAMzKq80CjigqBjMz21RDzvFLGgXsCzwMDIuINfmuF4BhVdpMlbRQ0sL169c3IkwbgMbN/T7j5n6/2WGYtZTCE7+kbYA5wOkR8Wr5vogIICq1i4gZEdEeEe1tbW1Fh2lmloxCE7+kwWRJ//qImJtvXitpeL5/OLCuyBjMzGxjRc7qETATWBoRF5Ttug2YkpenAPO7tjUzs+IUeQeuA4HPAY9LWpxv+wZwLnCzpBOBZ4GjC4zBzMy6KCzxR8SDgKrsPqSofs3MrHv+5q6ZWWKc+M3MEuPEb2aWGCd+M7PEOPGbmSXGid/MLDFO/GZmiXHiNzNLTJHf3DWrm7G3/kupfMcR32XsvH/LyhO/Vfe+xs/5Uam8YNLxfTrGP8zOlqb66eQj6xKTWT15xG9mlhgnfjOzxDjxm5klxonfzCwxTvxmZolx4k/YzGsPZea1hzY7jJYwbs5VjJtzVbPDMGuImqZzShoCHAeMKm8TEacWEpWZmRWm1nn8dwAPAY8DbxUXjpmZFa3WxP+OiDij0EjMzKwhaj3H/2NJX5Q0XNKOHY9CIzMzs0LUOuL/C/A94Gwg8m0B7F5EUGZmVpxaE/+ZwB4R8WKRwZiZWfFqPdXzNPCnIgMxM7PGqHXE/0dgsaT7gTc6Nno6p5nZ5qfWxH9r/jAzs81cTYk/ImYVHYiZmTVGTef4JT0jaUXXRw9trpa0TtKSsm3flrRa0uL8Mba/v4CZmfVOrad62svK7wCOAnqax38NcAlwbZftF0bE+TX2a2ZmdVbTiD8i/rvssToiLgLG9dDmAeClOsRoZmZ1VOsibfuVPd2C7H8Afb1f7zRJxwELgTMj4uUqfU4FpgKMHDmyj11ZvZ1342Gl8tePubumNlPnjS6VZ0y8q+4xNdL42dcDsGDyZxk/+8bS9gWTj2lWSGa9Vmvynk7nN3Y3ACvJTvf01mXAOfmxzsmPe0KlihExA5gB0N7eHpXqmJlZ79X6Ba4xwEzgPuCXwGqg10OciFgbEW9GxFvAlcD+vT2GmZn1T2/m8b8CPAL8ua+dSRoeEWvypxOBJd3VNzOz+qs18Y+IiNE9V+sk6SfAwcBQSauAfwUOlrQP2amelcBJvTmmmZn1X62J/1eSPhwRj9d64Ig4tsLmmbW2NzOzYtSa+A8CPi/pGbK1egRERHyksMjMzKwQtSb+MYVGYWYbmTjn56XyvEkHNy0OG5hqXavn2aIDMTOzxqh1OqeZmQ0QTvxmZolx4jczS4wTv5lZYpz4zcwS09cVNm2AueLHnatuejjQeo6c8yAAcycd1G29o+Z0roJyy6QPFRqTbb78J25mlhgnfjOzxDjxm5klxonfzCwxTvxmZolx4jczS4ync5ol6GvzVpXK508c0cRIrBk84jczS4wTv5lZYpz4zcwS48RvZpYYJ34zs8Q48ZuZJcbTOc1axBGzf1YqS/7TtOJ4xG9mlhgnfjOzxDjxm5klprDEL+lqSeskLSnbtqOkeyUty3/uUFT/ZmZWWZEj/muA0V22nQXcFxF7Avflz83MrIEKS/wR8QDwUpfNE4BZeXkWcERR/ZuZWWWNnjM2LCLW5OUXgGHVKkqaCkwFGDlyZANCq83zl55eKu98ykUb7Vv+gwml8nu/Mr9BEZk13/xbXiyVJxw1tImRWC2adnE3IgKIbvbPiIj2iGhva2trYGRmZgNboxP/WknDAfKf6xrcv5lZ8hqd+G8DpuTlKYDPh5iZNViR0zl/Avwa2EvSKkknAucCfy9pGfCp/LmZmTVQYRd3I+LYKrsOKapPMzPrmb+5a2aWGC8BaJaIKXOfBWDWkbvV3OaKuZ3zL7ZCpfJxR3qm3ebMI34zs8Q48ZuZJcaJ38wsMU78ZmaJceI3M0uME7+ZWWI8nbPF/OLKcaXyJ754exMjMSvWQ7PWl8oHTPH00EbyiN/MLDFO/GZmiXHiNzNLjBO/mVlinPjNzBLjWT01WPPDbzY7BLPNxp03dd5/d8ynff/dVuQRv5lZYpz4zcwS48RvZpYYJ34zs8Q48ZuZJcaJ38wsMZ7O2U/P/eAfAdj1K9c1OZJOt/xoNABHHX9XkyOxnkyYnb1H8yeP7lP7yXMeKZVnT9qvLjHV6uY52bTNoyd1P2XzvhuyxdgO+YwXYmsVHvGbmSXGid/MLDFO/GZmiWnKOX5JK4HXgDeBDRHR3ow4zMxS1MyLu38XES/2XM3MzOrJp3rMzBLTrBF/APdICuCKiJjRtYKkqcBUgJEjRzY4vIFp1jWHlspTPn9PEyOxRvj0nGWl8k2T9mxiJMVZNf2FUnnEme9uYiSbl2aN+A+KiP2AMcApkj7etUJEzIiI9ohob2vz/F8zs3ppSuKPiNX5z3XAPGD/ZsRhZpaihid+SVtL2rajDBwKLGl0HGZmqWrGOf5hwDxJHf3fEBFeW8DMrEEanvgjYgXw0Ub3a2ZmGU/nNDNLjFfnLMjSSw8vlT9wym3d1v3VjPEA/K+pC7qtd9fMsQCMPvGOfkZntvl46odrS+X3f3lYze1eOH8FAO/+2u4bb7/g8VL53Wd8uJ/RbZ484jczS4wTv5lZYpz4zcwS48RvZpYYJ34zs8Q48ZuZJcbTORvk0cs6p3f+ZYsolf/6pJ82Ixyzqi6cl614+dWJA2e1yxemP9X5RLW3W/v9XwIw7LQD6xxRc3nEb2aWGCd+M7PEOPGbmSXGid/MLDFO/GZmiXHiNzNLTNLTOddffkWpHPy5VN7p5NOaEc4m7rtqXKl8yBdur1pv3tVjSuWJJ9xZaEytbtzc6aXy7UeeWb3enM73/vZJJ1WtN372taXygsnH9TO6+pg05+FSWQzu9/HOmfd8qfzNiTv3+3h9tWjmOgD+6sSduq234uJsuunup2483XTNeatK5eFfH1Fzv2sveqTs2YZSadjp1e8Iu+4HPyuVd/rKpzbed2nnKrs7nTK+5jgaySN+M7PEOPGbmSXGid/MLDFO/GZmiXHiNzNLjBO/mVliBvx0zvWXXVcqt33pH1l/2Y/y8vHdtlt72bkADPvSWcUF1w8LyqZwdueGaw4rLIZzbuo89jc/fXfN7SbPHw3A7Al3bbR9zPyTS+U7J1zez+gaa/zsW0rlBZOPqlrv8Nmd03Jvmzyuar2B6oEfry+V39ZNvcdmrKupXiOtvfgXpbK6rPC57pLss7zTtNEbb790Xqm80ykTWffDm7Pyl48uKMraeMRvZpYYJ34zs8Q48ZuZJaYpiV/SaEm/l/S0pNY8iW5mNkA1PPFLGgRcCowB9gaOlbR3o+MwM0tVM0b8+wNPR8SKiPgLcCMwoQlxmJklSRHRc616dihNBkZHxBfy558D/iYipnWpNxWYmj/9ELCkoYFuaijwYpNjgNaIoxVigNaIoxVigNaIoxVigNaIoxViANgrIrbturFl5/FHxAxgBoCkhRHR3sx4WiGGVomjFWJolThaIYZWiaMVYmiVOFohho44Km1vxqme1cCuZc9H5NvMzKwBmpH4fwvsKek9kt4GHAPc1oQ4zMyS1PBTPRGxQdI04G5gEHB1RDzRQ7MZxUfWo1aIAVojjlaIAVojjlaIAVojjlaIAVojjlaIAarE0fCLu2Zm1lz+5q6ZWWKc+M3MEuPE34WkqyWtk1TxewPKXJwvN/GYpP2K6lfSUZKekPSWpKpTw+q1BIakXSXdL+nJvN/T8u07SrpX0rL85w5V2k/J6yyTNKXOMdwkaXH+WClpcZX29Xot3iHpN5IezeP4Tpf9F0t6vZv2/5zH8HtJ/VobW9IgSb+TtCB/Pi0/dkga2k27fr8fZcdaKenx/PVfmG9r2Oeimxg+KunX+fafStquStt6fS72KvscLpb0qqTTG/232m8R4UfZA/g4sB+wpMr+scCdgIADgIeL6hf4ALAX8HOgvUq7QcByYHeypcsfBfbuYwzDgf3y8rbAH8iW1TgPOCvffhbwHxXa7gisyH/ukJd3qFcMXepMB75V8GshYJu8PBh4GDggf94O/Bh4vUrbvfO+3w68J49pUD8+G2cANwAL8uf7AqOAlcDQKm3q8n6UHW+Tvhr5uegmht8Cn8jLJwDnFPm5qHDcF4DdGv232t+HR/xdRMQDwEvdVJkAXBuZh4AhkoYX0W9ELI2I3/fQtG5LYETEmoh4JC+/BiwFdsmPNyuvNgs4okLzw4B7I+KliHgZuBcYXaFeX2MAsv9xAUcDP6nQvJ6vRUREx4h+cP4IZWtNfQ/4ejfNJwA3RsQbEfEM8HQeW69JGgGMA64qi+13EbGyh6Z1eT960LDPRTfeBzyQl+8FJlWoU9QyMYcAyyPi2Ub/rfaXE3/v7QI8V/Z8FWWJqQkKiUfSKLKR5cPAsIhYk+96ARjWiDi6xNDhb4G1EbGs6BjyUyyLgXVkyethYBpwW9nrUUk947iI7B+Zt3rZrt7vRwD3SFqkbDkVaPznolIMT9CZPI9i4y+HFhFDuWOoPACppmVyR8su2WDNI2kbYA5wekS8qrL7zEVESCp8DnDXGMp2HUvv/tj6LCLeBPaRNASYJ+njZMnl4Eb0L2k8sC4iFklqSJ/dOCgiVkvaCbhX0lPlOxv0uagUwwnAxZK+SfZF0L8UHAMAyr58ejjwz43or9484u+9Vltyoq7xSBpMlnCvj4i5+ea1Haez8p/rKjStWxxVYkDSlsCRwE1Vmhby3kTEK8D9wN8BewBPS1oJvFPS0wXGcSBweN7XjcAnJV3XfZO6xwBARKzOf64D5pGdtmjo56JSDBHxVEQcGhF/RTYgWF5kDGXGAI9ExNpetGmd3NGMCwut/iC7cFbt4u44Nr64+5ui+6X7C0Zbkl0wew+dF4w+2Mf+BVwLXNRl+/fY+CLeeRXa7gg8Q3YBb4e8vGO9Ysj3jQZ+0U3ber4WbcCQvLwV8J/A+C51ql3c/SAbX9xdQT8u7ubHPJj84m7ZtpV0f3G33+9HfqytgW3Lyr/K34tGfi6qxbBTvm2L/HNzQpGfi7Jj3ggcX2F7Q/5W+/toeIet/iAbNawB/h/ZObgTgZOBk/P9IruRzHLg8Wpvcp36nZiX3wDWAnfndXcG7ihrO5Zs9sty4Ox+xHAQ2XnUx4DF+WMs8C7gPmAZ8LOOP1yy2S1XlbU/gexC5tOV/ij6E0O+75qO96GsflGvxUeA3+VxLKHyLKLXy8qHA/9W9vzsPIbfA2Pq8Pk4mM5ZPafmn4sNwPMd70ER70d+nN3zJPUo2Tn1s/PtjfxcVIvhtPz9/gNwLp2rERTyuciPtTXw38D2Zdsa+rfa34eXbDAzS4zP8ZuZJcaJ38wsMU78ZmaJceI3M0uME7+ZWWKc+M3MEuPEb2aWGCd+sx5IujVfGOyJjsXBJJ0o6Q/5mv1XSrok394maY6k3+aPA5sbvdmm/AUusx5I2jEiXpK0Fdn674cBvyS7f8JrwP8BHo2IaZJuAH4YEQ9KGkn2Dc4PNC14swq8OqdZz06VNDEv7wp8jmzNoJcAJN1Cti48wKeAvctWNN1O0jbRuba/WdM58Zt1I18O+VPAxyLiT5J+DjxFdselSrYgu1PXnxsSoFkf+By/Wfe2B17Ok/77yVZk3Rr4hKQd8qWiy+/6dA/wlY4nkvZpZLBmtXDiN+veXcCWkpaSrf74ENka6v8O/IbsXP9K4P/m9U8F2iU9JulJspVdzVqKL+6a9UHHeft8xD8PuDoi5jU7LrNaeMRv1jffzu/Hu4Ts5iK3NjUas17wiN/MLDEe8ZuZJcaJ38wsMU78ZmaJceI3M0uME7+ZWWL+PzBt3YAHrrgFAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#感觉用seaborns画出来好看一些\n",
    "import seaborn as sns\n",
    "\n",
    "# 用Seaborn画条形图\n",
    "sns.barplot(train_data['Age'].value_counts().index, train_data['Age'].value_counts().values)\n",
    "plt.title(\"age in train_data\")\n",
    "plt.xticks(range(-5,100,10))\n",
    "plt.xlabel(\"age\")\n",
    "plt.ylabel(\"num\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#年龄的数据分布还是比较正常 没有什么不均匀与异常的地方 所以年龄用平均值来填充吧  但是最后最好转成一个整数 毕竟年龄是个小数不太正常\n",
    "train_data['Age'].fillna(int(train_data['Age'].mean()),inplace=True)#填充完了 看看是否还有缺失值\n",
    "train_data['Age'].isna().any()#没有缺失值了 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/stu_15527388015/.local/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n",
      "  FutureWarning\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEWCAYAAABhffzLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZ5klEQVR4nO3de5hcVZ3u8e9LAshNIKYJl9AEHcFhUJFpGRwYRPFASCLhEhgiIEicCHMgIDqI8gyoox4UUFCQEEgMmsgIuYFcgw6KIhcTSCAkKJeABEMSTmDk4gCR3/yxd1cqTXen0vbeu7rX+3meerJq39avqztvr961a21FBGZmlo6Nqi7AzMzK5eA3M0uMg9/MLDEOfjOzxDj4zcwS4+A3M0uMg9+SIOlLkq6uuo6ekjRMUkgaWHUt1vc5+C0JEfGNiPh0T/aVNFXS1/7aGsoKb0kHSlpWZB/Wtzn4zcwS4+C3Skk6R9ITkl6StFjSEXXrBki6WNLzkpZKOq1+xCxpa0mTJS2X9Kykr0ka0EU/X5Y0LW+3j7xPlPSH/PjndrHfeOA44GxJL0v6ab58R0kzJa3Ka5tQt88+kuZJ+pOkFZK+na+6K//3xfxYH+rmdRkg6aK8tieBkR3Wf0rSkvx1e1LSZ/LlWwC3Ajvmfbyc17qPpHskvZi/XpdJ2qSbb431ZxHhhx+VPYCjgR3JBiH/DLwC7JCvOwVYDAwFtgV+BgQwMF8/G7gS2ALYDrgf+EwX/XwZmJa3h+XHuQrYDHg/8Brwt13sOxX4Wt3zjYD5wHnAJsA7gSeBQ/L19wAn5O0tgX079DuwgdflFOBRYGdgEHBnh699JPAuQMCHgVeBvfN1BwLLOhzv74F9gYF5HUuAM6v+/vtRzcMjfqtURFwfEX+MiDcj4ifAY8A++epjgEsjYllEvABc0L6fpCHACLLweiUiVgLfAY7dgO6/EhF/joiFwEKyXwCN+CDQEhFfjYjXI+JJsl8i7X2/AfyNpMER8XJE3LsBNbU7BrgkIp6JiNXA/6tfGRE3R8QTkfklMBf4p64OFhHzI+LeiFgTEU+R/cL8cA/qsn7AVwhYpSR9EjiLbBQK2Qh5cN7eEXimbvP69i7AxsBySe3LNuqwzfo8V9d+Ne+7EbuQnUp5sW7ZAOBXeXsc8FXgUUlLyX7B3LQBdcFbv/an61dKOhQ4H9iN7OveHHi4q4NJ2g34NtCWbzuQ7K8WS5CD3yojaReykfJBwD0R8RdJC8hOXwAsJzvN027nuvYzZKdnBkfEmoJL7TiF7TPA0oh4d6cbRzwGjJW0EXAkMEPSOzo5TneWs+7X29rekLQpMBP4JHBDRLwhaQ5rX7fO+rkCeBAYGxEvSToTGLMB9Vg/4lM9VqUtyEJqFWRvWAJ71q2/DjhD0k6StgG+0L4iIpaTnd64WNLbJW0k6V2Sijh9sYLsPH67+4GXJH1B0mb5G7F7Svpg/nUcL6klIt4EXsz3eTP/Ot/scKyuXAdMkDRU0rbAOXXrNgE2zY+3Jh/9H9yh3ndI2rpu2VbAn4CXJb0HOLWhr9z6JQe/VSYiFgMXk70ZugJ4L3B33SZXkYX7Q2Sj1VuANcBf8vWfJAvBxcALwAxghwJKnQzskV8RMyci/gKMAvYClgLPA1cD7UE7HHhE0svApcCx+XsJrwJfB+7Oj7VvN31eBdxO9t7DA8Cs9hUR8RIwgeyXwwvAJ4Ab69Y/ClwLPJn3syPw+Xy7l/Jj/+SveD2sj1OEb8RifUM+sp0YEbtUXYtZX+YRvzWt/DTKCEkDJe1E9mbm7KrrMuvrHPzWzAR8hex0xoNk156fV2lFvUjSxLoPWdU/JlZdm/VvPtVjZpYYj/jNzBLTJ67jHzx4cAwbNqzqMszM+pT58+c/HxEtHZf3ieAfNmwY8+bNq7oMM7M+RdLTnS33qR4zs8Q4+M3MEuPgNzNLjIPfzCwxDn4zs8Q4+M3MEuPgNzNLjIPfzCwxDn4zs8T0iU/uWnP6zOzhtfaVR9zW8H4j5mQ30rrl8G/2ek1mtn4e8ZuZJcbBb2aWGAe/mVliHPxmZolx8JuZJcbBb2aWGAe/mVliHPxmZokpLPglTZG0UtKiDstPl/SopEckfauo/s3MrHNFjvinAsPrF0j6CDAaeH9E/B1wUYH9m5lZJwoL/oi4C1jdYfGpwAUR8Vq+zcqi+jczs86VfY5/N+CfJN0n6ZeSPtjVhpLGS5onad6qVatKLNF66sgbhnPkDcPXv6GZVars4B8IDAL2Bf4NuE6SOtswIiZFRFtEtLW0tJRZo5lZv1Z28C8DZkXmfuBNYHDJNZiZJa3s4J8DfARA0m7AJsDzJddgZpa0wubjl3QtcCAwWNIy4HxgCjAlv8TzdeDEiIiiajAzs7cqLPgjYmwXq44vqk8zM1s/f3LXzCwxDn4zs8Q4+M3MEuPgNzNLjIPfzCwxDn4zs8Q4+K1fGzVzKqNmTq26DLOm4uA3M0uMg9/MLDEOfjOzxDj4zcwS4+A3M0uMg9/MLDEOfjOzxDj4zcwSU1jwS5oiaWV+05WO6z4nKST5totmZiUrcsQ/FRjecaGknYGDgT8U2LeZmXWhsOCPiLuA1Z2s+g5wNuBbLpqZVaDUc/ySRgPPRsTCBrYdL2mepHmrVq0qoTozszSUFvySNge+BJzXyPYRMSki2iKiraWlpdjizMwSUuaI/13ArsBCSU8BQ4EHJG1fYg1mZskbWFZHEfEwsF378zz82yLi+bJqMDOzYi/nvBa4B9hd0jJJ44rqy8zMGlfYiD8ixq5n/bCi+jYzs675k7tmZolx8JuZJcbBb2aWGAe/mVliHPxmZolx8JuZJaa0D3BZc7vo2kNq7c+Pvb3CSsysaB7xm5klxsFvZpYYB7+ZWWIc/GZmiXHwm5klxsFvZpYYB7+ZWWIc/GZmiSnyRixTJK2UtKhu2YWSHpX0kKTZkrYpqn8zM+tckSP+qcDwDsvuAPaMiPcBvwe+WGD/ZmbWicKCPyLuAlZ3WDY3ItbkT+8lu+G6mZmVqMpz/CcDt1bYv5lZkiqZpE3SucAaYHo324wHxgO0traWVJlVacTsb9TatxzxpXXWjZx1aa1985FnlFaTWX9U+ohf0knAKOC4iIiutouISRHRFhFtLS0tpdVnZtbflTrilzQcOBv4cES8WmbfZmaWKfJyzmuBe4DdJS2TNA64DNgKuEPSAkkTi+rfzMw6V9iIPyLGdrJ4clH9mZlZY/zJXTOzxDj4zcwS4+A3M0uMg9/MLDEOfjOzxDj4zcwS4+A3M0tMJXP1WFoOveG0WvvW0Zets27E7PPrnm1aUkVmafOI38wsMQ5+M7PEOPjNzBLj4DczS4yD38wsMQ5+M7PEOPjNzBLj4DczS0yRd+CaImmlpEV1ywZJukPSY/m/2xbVv5mZda7IEf9UYHiHZecAP4+IdwM/z5+bmVmJCgv+iLgLWN1h8Wjgmrx9DXB4Uf2bmVnnyp6rZ0hELM/bzwFDutpQ0nhgPEBra2sJpVl/MHLm2ts633zUuAorMWtelb25GxEBRDfrJ0VEW0S0tbS0lFiZmVn/Vnbwr5C0A0D+78qS+zczS17ZwX8jcGLePhG4oeT+zcySV+TlnNcC9wC7S1omaRxwAfB/JD0GfCx/bmZmJWrozV1J2wCfBIbV7xMRE7raJyLGdrHqoMbLMzOz3tboVT23APcCDwNvFleOmZkVrdHgf1tEnFVoJWZmVopGz/H/SNK/SNohn3ZhkKRBhVZmZmaFaHTE/zpwIXAua6+9D+CdRRRlZmbFaTT4Pwf8TUQ8X2QxZmZWvEZP9TwOvFpkIWZmVo5GR/yvAAsk3Qm81r6wu8s5zVJy+Iyf19pzxjTnFcuzZ2R/sB8xZnDFlVjVGg3+OfnDzMz6uIaCPyKuWf9WZmbWFzT6yd2ldDKTZkT4qh4zsz6m0VM9bXXttwFHA76O38ysD2roqp6I+P91j2cj4hJgZLGlmZlZERo91bN33dONyP4CKPvuXWZm1gsaDe+LWXuOfw3wFNnpHjMz62MaDf5DgaNYd1rmY4GvFlCTmZkVqNFP7s4BPg68AbycP17paaeSPivpEUmLJF0r6W09PZaZmW2YRkf8QyNieG90KGknYAKwR0T8WdJ1ZH89TO2N45uZWfcaHfH/RtJ7e7HfgcBmkgYCmwN/7MVjm5lZNxod8e8PnJR/kOs1QEBExPs2tMOIeFbSRcAfgD8DcyNibsftJI0HxgO0trZuaDdWkM/PqPvDb0B1dZhZz23Im7u9QtK2wGhgV+BF4HpJx0fEtPrtImISMAmgra3tLZ8aNjOznml0rp6ne7HPjwFLI2IVgKRZwD8C07rdy8zMekWj5/h70x+AfSVtLknAQcCSCuowM0tS6cEfEfcBM4AHgIfzGiaVXYeZWaoqmXYhIs4Hzq+ibzOz1FVxqsfMzCrk4DczS4yD38wsMQ5+M7PEOPjNzBLj4DczS4yD38wsMQ7+hH13+iF8d/ohVZfRIyNnXc7IWZdXXYZZn+TgNzNLjIPfzCwxDn4zs8Q4+M3MEuPgNzNLjIPfzCwxDn4zs8Q4+M3MElNJ8EvaRtIMSY9KWiLpQ1XUYWaWokruwAVcCtwWEWMkbQJsXlEdZmbJKT34JW0NHACcBBARrwOvl12HmVmqqhjx7wqsAn4g6f3AfOCMiHilfiNJ44HxAK2traUXaf3PqBnT6p4NqLVuGjN2ne0+PmNmrf3TMUcVXVbNMTN/V2tfd9TupfVbtOXfXA7ADl/YoeJKrF0V5/gHAnsDV0TEB4BXgHM6bhQRkyKiLSLaWlpayq7RzKzfqiL4lwHLIuK+/PkMsl8EZmZWgtKDPyKeA56R1P637EHA4rLrMDNLVVVX9ZwOTM+v6HkS+FRFdZiZJaeS4I+IBUBbFX2bmaXOn9w1M0uMg9/MLDEOfjOzxDj4zcwS4+A3M0uMg9/MLDFVXcdv1ueNnnF7ra1e/q90wqyna+0fHbnLOuu+Pnt5rb0VqrUnHLF9j/r62bWrau2PjW3hrmnZ8wOOX3eqlAcmr6y19x63XY/6subgEb+ZWWIc/GZmiXHwm5klxsFvZpYYB7+ZWWIc/GZmiXHwm5klxsFvZpaYyoJf0gBJD0q6qaoazMxSVOWI/wxgSYX9m5klqZLglzQUGAlcXUX/ZmYpq2qunkuAs4GtutpA0nhgPEBra2s5VVmnzrtuOABfPea2iiux9Zk8a+18OuOO9Hw61rnSR/ySRgErI2J+d9tFxKSIaIuItpaWlu42NTOzDVDFqZ79gMMkPQX8J/BRSdMqqMPMLEmlB39EfDEihkbEMOBY4L8i4viy6zAzS5Wv4zczS0ylN2KJiF8Av6iyBjOz1HjEb2aWGAe/mVliHPxmZolx8JuZJcbBb2aWGAe/mVliKr2cs6/44/fPrrV3/NdvNbTPwisOq7Xff+qNvV7T9T8YXmsf/anenUPnGz85pNb+0j/f3qvHTtGRM++ptcXGtfbMo9qqKAeAm3/yfK29aTfb3Tt1Va29STfb/f6yFbX2bqcN6XK75y5cWmtv/2+7drndikseqLWHnLl3Nz13beVlNwOw3Wkje7R/f+YRv5lZYhz8ZmaJcfCbmSXGwW9mlhgHv5lZYhz8ZmaJcfCbmSXGwW9mlpgq7rm7s6Q7JS2W9IikM8quwcwsZVV8cncN8LmIeEDSVsB8SXdExOIKajEzS04V99xdHhEP5O2XgCXATmXXYWaWqkrn6pE0DPgAcF8n68YD4wFaW1vLLawbyy4bV2sPPW1yj45x96RRtfZ+42/izquyuUQ+8i83M3fyiNq6g8fdss5+c6YcCsDhJ9+6zvIfT107t84nTup6bp2JP1q73SkneA6eeqNmXAfATWOOqbiSvu/p7zxXa+/y2e17/fgrvvtrAIZM2H+d5Su/N7fW3u70g3u93/6ksjd3JW0JzATOjIg/dVwfEZMioi0i2lpaWsov0Mysn6ok+CVtTBb60yNiVhU1mJmlqoqregRMBpZExLfL7t/MLHVVjPj3A04APippQf4Ysb6dzMysd5T+5m5E/BpQ2f2amVnGn9w1M0uMg9/MLDEOfjOzxDj4zcwS4+A3M0uMg9/MLDEOfjOzxFQ6SVsZVl0xrdZuOfV4Vk2ckrVPOZlVE6+orQv+XGtvd8pZPHfFfwCw/an/3u3xl37vcAB2PX0OSy4fvXZFh1+pv73y42ufNPgphpsnH1prjxx3azdbruuaqdkEVSeeNJerf7h2YrYyPz1x6A3H1T3btkfHGDnrorpnG3e93cwr65717o/0YTNuqLVvHDOaw2bcnLdHdrvfETPvAmD2UQd0u93RMx8C4Pqj3tftdp+fvQyAi44Y2n3BdX48c1Wt/Ymjen++q0VXrgBgz88M6dH+z138u1p7+8/tznPffjhrn/XedbZbcenaORyHnPEP66xb+b07G+pr5eVz1j7RG7Xmdv96dKPl9ise8ZuZJcbBb2aWGAe/mVliHPxmZolx8JuZJcbBb2aWGAe/mVliHPxmZomp6p67wyX9TtLjks6pogYzs1RVcc/dAcDlwKHAHsBYSXuUXYeZWaqqGPHvAzweEU9GxOvAfwKj17OPmZn1EkVEuR1KY4DhEfHp/PkJwD9ExGkdthsPjM+f7gksKrXQtxoMPF9xDdAcdTRDDdAcdTRDDdAcdTRDDdAcdTRDDQC7R8RWHRc27SRtETEJmAQgaV5EtFVZTzPU0Cx1NEMNzVJHM9TQLHU0Qw3NUkcz1NBeR2fLqzjV8yywc93zofkyMzMrQRXB/1vg3ZJ2lbQJcCxwYwV1mJklqfRTPRGxRtJpwO3AAGBKRDyynt0mFV/ZejVDDdAcdTRDDdAcdTRDDdAcdTRDDdAcdTRDDdBFHaW/uWtmZtXyJ3fNzBLj4DczS4yDvwNJUyStlNTp5waU+W4+3cRDkvYuql9JF0p6NO9ntqRtuti3V6bAkLSzpDslLZb0iKQz8uWDJN0h6bH8305voivpxHybxySdWEAde0m6V9ICSfMk7VNUHZLeJul+SQvzGr6SL58qaWlewwJJexVVQ92xBkh6UNJN+fPp+fd7Uf5z0+kNiXu5hqckPdz+2ufL/iP/2Vwgaa6kHYusQ9I2kmbk/yeWSPpQ2TX0GxHhR90DOADYG1jUxfoRwK1kty7fF7ivqH6Bg4GBefubwDc72W8A8ATwTmATYCGwRw9r2AHYO29vBfyebFqNbwHn5MvP6aKOQcCT+b/b5u1te7mOucChdd+HXxRVR/793TJvbwzcl3+/pwJj1rNvr70W+fHOAn4M3FT3tSt/XAucWkINTwGDOyx7e117AjCx4J+La4BP5+1NgG3KrqG/PDzi7yAi7gJWd7PJaOCHkbkX2EbSDkX0GxFzI2JN/vRess88dNRrU2BExPKIeCBvvwQsAXbKj3dNvtk1wOGd7H4IcEdErI6IF4A7gOG9XEcAb8832xr4Y1F15N/fl/OnG+ePRq+E6LXXQtJQYCRwdV1tt+T1BXA/nf9c9FoNXYmIP9U93YLOX59eqUPS1mSDo8l5369HxItl1tCfOPg33E7AM3XPl+XLinYy2V8apdQjaRjwAbKR7pCIWJ6veg4YUlEdZwIXSnoGuAj4YpF15KdYFgAryYLjvnzV1/PTC9+RtGmRNQCXAGcDb3ZS38bACcBtBdcAWaDOlTRf2XQq7TV8Pf9+HAecV2AduwKrgB/kp72ulrRFyTX0Gw7+PkDSucAaYHpJ/W0JzATO7DCiIh9llnINcCd1nAp8NiJ2Bj5LPvorSkT8JSL2IhtR7yNpT7JfNu8BPkh26uALRfUvaRSwMiLmd7HJ94G7IuJXRdVQZ/+I2JtsVt3/K+kAgIg4N/9+TAdO6+4Af6WBZKdCr4iIDwCvkJ12LLOGfsPBv+FKnXJC0knAKOC4PHQLrScfRc4EpkfErHzxivbTWfm/Kyuq40SgvX092WmuQusAiIgXgTvJJhdcnp9leQ34QcE17AccJukpslN4H5U0DUDS+UAL2fn/zvTq6xARz+b/rgRm89avezpwVIF1LAOW1f3VNYPsF0GZNfQfVb/J0IwPYBhdv7k7knXf3L2/qH7JzkMuBlq62Wcg2ZtVu7L2zd2/62H/An4IXNJh+YWs++butzrZdxCwlOzNs23z9qBermMJcGDePgiYX1QdZKG6Td7eDPgV2S/gHepqvAS4oMjXou6YB7L2zd1PA78BNutm+978fmwBbFXX/k3+s/nuum1OB2YUXMevyGabBPhy/nNZag395VF5Ac32ILtKYjnwBtkoYxxwCnBKvl5kN5J5AngYaCuw38fJzk0uyB8T8213BG6p23cE2ZUvTwDn/hU17E92Guehuj5HAO8Afg48Bvys/T8N0AZcXbf/yXnNjwOfKqCO/YH5ZL/c7gP+vqg6gPcBD+Y1LALOy5f/V/59XwRMY+2VP4W8FnXHO5C1wb8m/163vzbnFVkD2RVjC/PHI+0/Y2R/kS3KX6OfAjsVXMdewLy8vzlkIV5qDf3l4SkbzMwS43P8ZmaJcfCbmSXGwW9mlhgHv5lZYhz8ZmaJcfCbmSXGwW9mlhgHv9l6SJqTT072SPsEZZLGSfp9Pmf/VZIuy5e3SJop6bf5Y79qqzd7K3+Ay2w9JA2KiNWSNgN+SzbN791kc8W8RPZp3oURcZqkHwPfj4hfS2oFbo+Iv62seLNODKy6ALM+YIKkI/L2zmRTIf8yIlYDSLoe2C1f/zFgD0nt+75d0paxdm5/s8o5+M26IelAsjD/UES8KukXwKNAV6P4jYB9I+J/SinQrAd8jt+se1sDL+Sh/x6yGVm3AD4saVtJA1l3KuC5ZLNEAtl9gsss1qwRDn6z7t0GDJS0BLiA7BaYzwLfILvt4d1k96P973z7CUBbfoeuxWQzu5o1Fb+5a9YD7eft8xH/bGBKRMyuui6zRnjEb9YzX87vx7uI7MYecyqtxmwDeMRvZpYYj/jNzBLj4DczS4yD38wsMQ5+M7PEOPjNzBLzv3GlrmwH/90cAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#同理看一下测试集的年龄分布\n",
    "# 用Seaborn画条形图\n",
    "sns.barplot(test_data['Age'].value_counts().index, test_data['Age'].value_counts().values)\n",
    "plt.title(\"age in test_data\")\n",
    "plt.xticks(range(-5,100,10))\n",
    "plt.xlabel(\"age\")\n",
    "plt.ylabel(\"num\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#年龄的数据分布还是比较正常 没有什么不均匀与异常的地方 所以年龄用平均值来填充吧  但是最后最好转成一个整数 毕竟年龄是个小数不太正常\n",
    "test_data['Age'].fillna(int(test_data['Age'].mean()),inplace=True)#填充完了 看看是否还有缺失值\n",
    "test_data['Age'].isna().any()#没有缺失值了 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 891 entries, 0 to 890\n",
      "Data columns (total 11 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   PassengerId  891 non-null    int64  \n",
      " 1   Survived     891 non-null    int64  \n",
      " 2   Pclass       891 non-null    int64  \n",
      " 3   Name         891 non-null    object \n",
      " 4   Sex          891 non-null    object \n",
      " 5   Age          891 non-null    float64\n",
      " 6   SibSp        891 non-null    int64  \n",
      " 7   Parch        891 non-null    int64  \n",
      " 8   Ticket       891 non-null    object \n",
      " 9   Fare         891 non-null    float64\n",
      " 10  Embarked     889 non-null    object \n",
      "dtypes: float64(2), int64(5), object(4)\n",
      "memory usage: 76.7+ KB\n"
     ]
    }
   ],
   "source": [
    "#Cabin确实的数据太多 不如直接删除了吧 而且都是一些 字母编号之类的 感觉很麻烦 确实太多了 \n",
    "train_data.drop(columns = ['Cabin'],inplace=True)\n",
    "test_data.drop(columns = ['Cabin'],inplace=True)#删除操作\n",
    "train_data.info()#已经没有Cabin了  #训练集中Embarked还有两个缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 418 entries, 0 to 417\n",
      "Data columns (total 10 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   PassengerId  418 non-null    int64  \n",
      " 1   Pclass       418 non-null    int64  \n",
      " 2   Name         418 non-null    object \n",
      " 3   Sex          418 non-null    object \n",
      " 4   Age          418 non-null    float64\n",
      " 5   SibSp        418 non-null    int64  \n",
      " 6   Parch        418 non-null    int64  \n",
      " 7   Ticket       418 non-null    object \n",
      " 8   Fare         417 non-null    float64\n",
      " 9   Embarked     418 non-null    object \n",
      "dtypes: float64(2), int64(4), object(4)\n",
      "memory usage: 32.8+ KB\n"
     ]
    }
   ],
   "source": [
    "test_data.info()#已经没有Cabin了  测试集中还有一个缺失值 Fare  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/stu_15527388015/.local/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n",
      "  FutureWarning\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEICAYAAACktLTqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAATXElEQVR4nO3df7RdZX3n8fenRJQWNCB3UkyCsZLa0loRM4C1OlacFhinoS1ldHVKcOHkH7DttNMWx7Uqjjr9MWuGwtjSxZJqaFWkVEtqWVYMUmecQg2KWMCWNIJJyo/LT2VAHfA7f5wn9uSSm3Nu7o8THt6vtc46ez/Ps/f+7rOzPnff55xzk6pCktSX75p0AZKkhWe4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7loUST6Q5N0LuL8LkvzJAuzntUl27qP/0STfN9/j7K8ktyZ57QLvc0GvhZ4eDHd9R5I7kzzeAm73472TrmspVdWhVbV9rtslWZOkkiyb5/F/qKqun88+5iPJ9UneMqnja+HM6x+iuvRvq+pTky5i2HwD80CRZFlVPTHpOvTM4J27xpLk7CSfTXJhkoeTbE/yo619R5L7kmyYsdmRSa5N8vUkf53khUP7u6ht97UkNyV59VDfBUmuSvInSb4GnD2jlmcl+XCSP0tycJIXtOXpJF9J8otDYw9p0xIPJbkN+JcjzrOSHNOWP5Dk95P8ZTuHG5O8eJZNP9OeH26/8bxyxmv2AHBBkhcnuS7JA0nuT/LBJMuHjn9nktcPvQ5XJrm8Hf/WJOv2VX/b7uVJPt+2+QjwnKG+w5N8vL1WD7XlVa3vPcCrgfcO/9a2r2ulA5fhrrk4EbgFeD7wIeAKBmF5DPDvGYTCoUPjfx54F3AkcDPwwaG+zwHHAUe0ff1pkucM9a8HrgKWD2+X5BDgz4FvAmcCTwB/AXwRWAmcDPxykp9sm7wDeHF7/CQw8wfQKG8E3gkcDmwD3jPLuNe05+Vtaudv2vqJwHZgRds2wG8BLwB+EFgNXLCP4/8Ug9d5ObAZ2Oc0WZKDGbw+f8zgtf1T4GeHhnwX8H7ghcDRwOO791lVbwf+F3BeO4fz2jajrpUORFXlwwdVBXAn8Cjw8NDjP7S+s4E7hsa+FChgxVDbA8BxbfkDwBVDfYcCTwKrZzn2Q8DL2vIFwGdm9F/AINz+GrgYSGs/EfjqjLFvA97flrcDpwz1bQR27uM1KOCYoXN431DfacCXZ9luTdt22VDb2TNr28t2pwNfmHENXj90zp8a6jsWeHzE/l4D/NPu16e1/R/g3bOMPw54aGj9euAtI47xnWvl48B9dDGXqQV1es0+537v0PLjAFU1s234zn3H7oWqejTJgwzuWHck+U/AOW29gOcyuMN/yrZDTgKeBbypWsowuAN9QZKHh8YdxOAOlN3HG+q7a5Zzm809Q8uPsef5jWOP80iyAriIwfTHYQzupB+aw/GfM2Lu/gXArqHXB4bOOcl3AxcCpzD4bQTgsCQHVdWTe9vhGNdKByCnZbSYVu9eaNM1RwD/1OZsf53BtMrhVbUceITBlMVue/tb1J9kMKWxpYUkDMLzK1W1fOhxWFWd1vrvHq6DwVTEYpjtb2fPbP+vre2lVfVcBtNZecpW++9uYGWS4X0On/OvAi8BTmzH3z2dtHv8HvWOea10ADLctZhOS/JjbR74XcANVbWDwR3rE8A0sCzJbzK4Gxypqn6XwbzvliRHAn8LfD3Jb7Q3Tw9K8sNJdr9xeiXwtvZG4irgrQt7it8xDXwbGPUZ+cMYTH09kmQl8GsLXMffMHhtf7G98fwzwAkzjv84gzd+j2DwnsSwe9nzHPb7WmmyDHfN9BfZ83PuH5vHvj7EIDweBF7B4C4V4K+ATwD/wGDK4BvsfRpmr6rqXQzeNPwU8DzgDQzmjr8C3A+8r7XD4M3Qu1rfJxm80bjgquoxBm+YfrZ9muikWYa+Eziewd3vXwIfXeA6vgX8DIP5/geBfzfjGL8HHMLgdbqBwXUYdhFwRvskzcXM81ppcrLn1JwkqQfeuUtSh/y0jPQ0k+Ro4LZZuo+tqq8uZT06MDktI0kdOiDu3I888shas2bNpMuQpKeVm2666f6qmtpb3wER7mvWrGHr1q2TLkOSnlaSzPqlPN9QlaQOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDh0Q31Cdi1f82uWTLuEZ4ab/dtakS5A0D965S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SerQWOGeZHmSq5J8OcntSV6Z5Igk1ya5oz0f3sYmycVJtiW5Jcnxi3sKkqSZxr1zvwj4RFX9APAy4HbgfGBLVa0FtrR1gFOBte2xEbhkQSuWJI00MtyTPA94DXAZQFV9q6oeBtYDm9qwTcDpbXk9cHkN3AAsT3LUAtctSdqHce7cXwRMA+9P8oUk70vyPcCKqrq7jbkHWNGWVwI7hrbf2dr2kGRjkq1Jtk5PT+//GUiSnmKccF8GHA9cUlUvB/4v/zwFA0BVFVBzOXBVXVpV66pq3dTU1Fw2lSSNME647wR2VtWNbf0qBmF/7+7plvZ8X+vfBawe2n5Va5MkLZGR4V5V9wA7krykNZ0M3AZsBja0tg3A1W15M3BW+9TMScAjQ9M3kqQlMO7/xPRW4INJDga2A29m8IPhyiTnAHcBZ7ax1wCnAduAx9pYSdISGivcq+pmYN1euk7ey9gCzp1fWZKk+fAbqpLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1aKxwT3Jnki8luTnJ1tZ2RJJrk9zRng9v7UlycZJtSW5JcvxinoAk6anmcuf+41V1XFWta+vnA1uqai2wpa0DnAqsbY+NwCULVawkaTzzmZZZD2xqy5uA04faL6+BG4DlSY6ax3EkSXM0brgX8MkkNyXZ2NpWVNXdbfkeYEVbXgnsGNp2Z2uTJC2RZWOO+7Gq2pXkXwDXJvnycGdVVZKay4HbD4mNAEcfffRcNpUkjTDWnXtV7WrP9wEfA04A7t093dKe72vDdwGrhzZf1dpm7vPSqlpXVeumpqb2/wwkSU8xMtyTfE+Sw3YvAz8B/B2wGdjQhm0Arm7Lm4Gz2qdmTgIeGZq+kSQtgXGmZVYAH0uye/yHquoTST4HXJnkHOAu4Mw2/hrgNGAb8Bjw5gWvWpK0TyPDvaq2Ay/bS/sDwMl7aS/g3AWpTpK0X/yGqiR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdGjvckxyU5AtJPt7WX5TkxiTbknwkycGt/dltfVvrX7NItUuSZjGXO/dfAm4fWv8d4MKqOgZ4CDintZ8DPNTaL2zjJElLaKxwT7IK+DfA+9p6gNcBV7Uhm4DT2/L6tk7rP7mNlyQtkXHv3H8P+HXg2239+cDDVfVEW98JrGzLK4EdAK3/kTZekrRERoZ7kjcA91XVTQt54CQbk2xNsnV6enohdy1Jz3jj3Lm/CvipJHcCVzCYjrkIWJ5kWRuzCtjVlncBqwFa//OAB2butKourap1VbVuampqXichSdrTyHCvqrdV1aqqWgO8Ebiuqn4e+DRwRhu2Abi6LW9u67T+66qqFrRqSdI+zedz7r8B/EqSbQzm1C9r7ZcBz2/tvwKcP78SJUlztWz0kH9WVdcD17fl7cAJexnzDeDnFqA2SdJ+8huqktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHVoZLgneU6Sv03yxSS3Jnlna39RkhuTbEvykSQHt/Znt/VtrX/NIp+DJGmGce7cvwm8rqpeBhwHnJLkJOB3gAur6hjgIeCcNv4c4KHWfmEbJ0laQiPDvQYebavPao8CXgdc1do3Aae35fVtndZ/cpIsVMGSpNHGmnNPclCSm4H7gGuBfwQerqon2pCdwMq2vBLYAdD6HwGev4A1S5JGGCvcq+rJqjoOWAWcAPzAfA+cZGOSrUm2Tk9Pz3d3kqQhc/q0TFU9DHwaeCWwPMmy1rUK2NWWdwGrAVr/84AH9rKvS6tqXVWtm5qa2r/qJUl7Nc6nZaaSLG/LhwD/GridQcif0YZtAK5uy5vbOq3/uqqqBaxZkjTCstFDOArYlOQgBj8Mrqyqjye5DbgiybuBLwCXtfGXAX+cZBvwIPDGRahbkrQPI8O9qm4BXr6X9u0M5t9ntn8D+LkFqU6StF/8hqokdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDI8M9yeokn05yW5Jbk/xSaz8iybVJ7mjPh7f2JLk4ybYktyQ5frFPQpK0p3Hu3J8AfrWqjgVOAs5NcixwPrClqtYCW9o6wKnA2vbYCFyy4FVLkvZpZLhX1d1V9fm2/HXgdmAlsB7Y1IZtAk5vy+uBy2vgBmB5kqMWunBJ0uzmNOeeZA3wcuBGYEVV3d267gFWtOWVwI6hzXa2tpn72phka5Kt09PTc61bkrQPY4d7kkOBPwN+uaq+NtxXVQXUXA5cVZdW1bqqWjc1NTWXTSVJI4wV7kmexSDYP1hVH23N9+6ebmnP97X2XcDqoc1XtTZJ0hIZ59MyAS4Dbq+q/zHUtRnY0JY3AFcPtZ/VPjVzEvDI0PSNJGkJLBtjzKuAXwC+lOTm1vafgd8GrkxyDnAXcGbruwY4DdgGPAa8eSELliSNNjLcq+p/A5ml++S9jC/g3HnWJUmaB7+hKkkdMtwlqUOGuyR1yHCXpA6N82kZacF89b+8dNIldO/o3/zSpEvQAcA7d0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHRoZ7kn+KMl9Sf5uqO2IJNcmuaM9H97ak+TiJNuS3JLk+MUsXpK0d+PcuX8AOGVG2/nAlqpaC2xp6wCnAmvbYyNwycKUKUmai5HhXlWfAR6c0bwe2NSWNwGnD7VfXgM3AMuTHLVAtUqSxrS/c+4rqurutnwPsKItrwR2DI3b2dqeIsnGJFuTbJ2ent7PMiRJezPvN1SrqoDaj+0urap1VbVuampqvmVIkobsb7jfu3u6pT3f19p3AauHxq1qbZKkJbS/4b4Z2NCWNwBXD7Wf1T41cxLwyND0jSRpiSwbNSDJh4HXAkcm2Qm8A/ht4Mok5wB3AWe24dcApwHbgMeANy9CzZKkEUaGe1W9aZauk/cytoBz51uUJGl+/IaqJHVo5J27JAG86n++atIlPCN89q2fXZD9eOcuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1KFFCfckpyT5+yTbkpy/GMeQJM1uwcM9yUHA7wOnAscCb0py7EIfR5I0u8W4cz8B2FZV26vqW8AVwPpFOI4kaRapqoXdYXIGcEpVvaWt/wJwYlWdN2PcRmBjW30J8PcLWsiB5Ujg/kkXof3itXt66/36vbCqpvbWsWypK9mtqi4FLp3U8ZdSkq1VtW7SdWjuvHZPb8/k67cY0zK7gNVD66tamyRpiSxGuH8OWJvkRUkOBt4IbF6E40iSZrHg0zJV9USS84C/Ag4C/qiqbl3o4zzNPCOmnzrltXt6e8ZevwV/Q1WSNHl+Q1WSOmS4S1KHDPdFlOTtSW5NckuSm5OcOOmaNL4k35vkiiT/mOSmJNck+f5J16XRkqxKcnWSO5JsT/LeJM+edF1LyXBfJEleCbwBOL6qfgR4PbBjslVpXEkCfAy4vqpeXFWvAN4GrJhsZRqlXbuPAn9eVWuBtcAhwO9OtLAlNrEvMT0DHAXcX1XfBKiqnr8l16MfB/5fVf3h7oaq+uIE69H4Xgd8o6reD1BVTyb5j8BdSd5eVY9Otryl4Z374vkksDrJPyT5gyT/atIFaU5+GLhp0kVov/wQM65dVX0NuBM4ZhIFTYLhvkja3cErGPz9nGngI0nOnmhRkp4xDPdFVFVPVtX1VfUO4DzgZyddk8Z2K4Mfznr6uY0Z1y7Jc4Hvpe8/ULgHw32RJHlJkrVDTccBd02oHM3ddcCz218vBSDJjyR59QRr0ni2AN+d5Cz4zv8x8d+B91bV4xOtbAkZ7ovnUGBTktuS3MLgPy65YLIlaVw1+Or2TwOvbx+FvBX4LeCeyVamUYau3RlJ7gAeAL5dVe+ZbGVLyz8/IKlrSX4U+DDw01X1+UnXs1QMd0nqkNMyktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUP/H3W5futd2Lj8AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#看一下训练集中 Embarked 的分布情况\n",
    "sns.barplot(train_data['Embarked'].value_counts().index,train_data['Embarked'].value_counts().values)\n",
    "plt.title(\"Embarked in train_data\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    S\n",
      "dtype: object <class 'pandas.core.series.Series'>\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#从这个的数据分布来看 最好做众数的填充\n",
    "print(train_data['Embarked'].mode(),type(train_data['Embarked'].mode()))#注意这里用众数填充 mode()之后是个serise \n",
    "train_data['Embarked'].fillna(train_data['Embarked'].mode()[0],inplace=True)#要mode()[0]才能拿到具体的数字 否则这里填充不了\n",
    "train_data['Embarked'].isnull().sum()#test_data的Age这一列已经没有缺失值了  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "看下data['Fare']里面的统计之后有哪些值： [7.75, 26.0, 8.05, 13.0, 7.8958, 10.5, 7.775, 7.2292, 7.225, 8.6625, 7.8542, 21.0, 26.55, 7.8792, 27.7208, 7.25, 7.925, 262.375, 211.5, 69.55, 14.5, 7.55, 7.7958, 15.2458, 55.4417, 31.3875, 31.5, 14.4542, 9.5, 221.7792, 39.0, 134.5, 16.1, 23.0, 65.0, 13.775, 13.5, 59.4, 7.7333, 83.1583, 7.05, 29.7, 20.575, 46.9, 263.0, 164.8667, 75.2417, 13.9, 151.55, 12.1833, 11.5, 0.0, 73.5, 15.5, 32.5, 36.75, 6.4375, 60.0, 57.75, 93.5, 7.0, 22.525, 15.0458, 22.025, 12.35, 7.65, 51.8625, 13.8583, 136.7792, 79.2, 23.45, 82.2667, 21.6792, 81.8583, 42.5, 106.425, 3.1708, 8.1125, 50.0, 27.4458, 15.9, 31.6833, 61.9792, 12.2875, 45.5, 52.5542, 30.0, 27.75, 16.0, 10.7083, 13.4167, 211.3375, 15.7417, 8.7125, 16.7, 9.6875, 29.0, 7.8292, 15.55, 30.5, 71.2833, 15.1, 135.6333, 512.3292, 8.9625, 29.125, 20.25, 12.7375, 61.3792, 52.0, 15.75, 42.4, 7.8208, 23.25, 56.4958, 28.5, 18.0, 12.875, 31.6792, 90.0, 51.4792, 24.15, 15.5792, 7.7792, 7.2833, 25.7417, 37.0042, 14.1083, 25.7, 146.5208, 7.8875, 8.5167, 39.6, 53.1, 9.35, 63.3583, 41.5792, 7.5792, 14.4, 50.4958, 39.4, 34.375, 7.7208, 7.85, 76.2917, 7.725, 9.225, 39.6875, 75.25, 13.8625, 6.95, 61.175, 78.85, 20.2125, 247.5208, 7.575, 28.5375, 227.525, 108.9, 6.4958, 7.6292, 47.1, 22.3583, 17.4, 9.325, 14.4583, 15.0333, 25.4667, 21.075]\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAYaklEQVR4nO3dfZQldX3n8fdHHkSBCEhnHBlgNGEx6AZkZwGP6OID44APmF0fYFHBaEY9eqInJC7GPWLMSWJO4kMSckQUAj4sGqMgERQmqCEkojY4wCAgiMPCODANCAi66sh3/7jVcml+PdPM9L13uvv9OqdOV/3qV1XfuvT0h6q6VZWqQpKkqR4z6gIkSdsmA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQGpJ8OckJo65jSyU5Mcllo65Dc5sBoW1ekrVJfprk/r7hyYPcZlUdVVVnb8myXb0v3NoahvVHPsl7k3xq0NvR3LP9qAuQZuilVfUvW7JgkgCpqgdnuSZpXvMIQnNSkt2TfCnJRJIfdeNL+uZ/PcmfJfl34CfAU5M8LcmqJHcnuSHJqzax/q8neWM3fmKSy5L8dbetHyQ5aprlPgnsA/xzd6Tzzq79sCT/keSeJFclOaJvmROT3Jzkx926j0/yW8BpwLO69dyzmc/jiUnOT3Jfkm8BvzFl/t8kubWbf0WS53TtK4A/Bl7dbeeqrv31Sa7raro5yZs2tX3NU1Xl4LBND8Ba4IVT2p4I/A/g8cCuwOeA8/rmfx34v8DT6R0pPwG4FXh9N/1M4E7ggGm2+XXgjd34icAvgN8DtgPeAvyQ3lHJZusF9gLuAo6m9z9lR3bTY8DOwH3A/l3fxcDT+7Z72Qw/o88A/9it7xnAuv5lgdd0n9n2wEnA7cBO3bz3Ap+asr4X0wuZAP+NXsgePOrfBYfhDh5BaK44r/u/73uSnFdVd1XV56vqJ1X1Y+DP6P0h63dWVV1bVRuBFcDaqvqHqtpYVd8BPg+8cobbv6WqPlZVvwTOpveHfNEMl30NcGFVXVhVD1bVKmCcXmAAPAg8I8njqmp9VV07w/UCkGQ7emH5nqp6oKrWdDX+SlV9qvvMNlbVB4DHAvtPt86quqCqvl89/wpcDDzn0dSluc+A0Fzx8qrarRtenuTxST6a5JYk9wGXArt1fywn3do3vi9waF/I3AMcDzxphtu/fXKkqn7Sje4yw2X3BV45ZduHA4ur6gHg1cCbgfVJLkjytBmud9IYvSOD/v29pb9Dkj/sThnd223/CcCe060wyVFJLu9Ox91DL8ym7a/5yYDQXHUSvf8DPrSqfg14bteevj79z7K/FfjXvpDZrap2qaq3DKC2qc/QvxX45JRt71xV7weoqouq6kh6RyXXAx+bZj3TmQA2Anv3te0zOdJdb3gn8Cpg96raDbiXhz6rh20nyWPpHV39NbCo638hD/9stQAYEJqrdgV+CtyTZA/glM30/xLwn5K8NskO3fBfu4vBs+0O4Kl9058CXprkRUm2S7JTkiOSLEmyKMkxSXYGfgbcT++U0+R6liTZcVMb6057fQF4b3dkdQDQfw/HrvQCZALYPsl7gF+bUu/SJJN/D3akdwpqAtjYXZBf/qg/Bc15BoTmqg8Dj6N3ofly4Cub6txdp1gOHEvvAvPtwF/S+0M42/4C+N/d6aQ/rKpbgWPofVtogt4RxR/R+/f3GOAPuprupncdZfKo5qvAtcDtSe7czDbfRu+U1+3AWcA/9M27iN7n8z16p57+Hw8/HfW57uddSa7sPqvfp3fR+0fA/wTOfxT7r3kiVb5RTpL0SB5BSJKaDAhpjkhy7ZTHjUwOx4+6Ns1PnmKSJDXNq2cx7bnnnrV06dJRlyFJc8YVV1xxZ1WNtebNq4BYunQp4+Pjoy5DkuaMJLdMN89rEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDYhpLT75g1CVI0kgZEJKkJgNCktRkQEiSmgwISVKTASFJahpYQCTZO8nXkny3e1Xi27v2PZKsSnJj93P3aZY/oetzY5ITBlWnJKltkEcQG4GTquoA4DDgrUkOAE4GLqmq/YBLuumHSbIHcApwKHAIcMp0QSJJGoyBBURVra+qK7vxHwPXAXsBxwBnd93OBl7eWPxFwKqquruqfgSsAlYMqlZJ0iMN5RpEkqXAM4FvAouqan0363ZgUWORvYBb+6Zv69okSUMy8IBIsgvweeAdVXVf/7yqKqC2cv0rk4wnGZ+YmNiaVUmS+gw0IJLsQC8cPl1VX+ia70iyuJu/GNjQWHQdsHff9JKu7RGq6vSqWlZVy8bGxmaveEla4Ab5LaYAZwDXVdUH+2adD0x+K+kE4IuNxS8ClifZvbs4vbxrkyQNySCPIJ4NvBZ4fpLV3XA08H7gyCQ3Ai/spkmyLMnHAarqbuBPgW93w/u6NknSkGw/qBVX1WVAppn9gkb/ceCNfdNnAmcOpjpJ0uZ4J7UkqcmAkCQ1GRCSpCYDQpLUZEDMAl9PKmk+MiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1DeyFQUnOBF4CbKiqZ3RtnwX277rsBtxTVQc1ll0L/Bj4JbCxqpYNqk5JUtvAAgI4CzgV+MRkQ1W9enI8yQeAezex/POq6s6BVSdJ2qRBvnL00iRLW/OSBHgV8PxBbV+StHVGdQ3iOcAdVXXjNPMLuDjJFUlWbmpFSVYmGU8yPjExMeuFStJCNaqAOA44ZxPzD6+qg4GjgLcmee50Havq9KpaVlXLxsbGZrtOSVqwhh4QSbYH/jvw2en6VNW67ucG4FzgkOFUJ0maNIojiBcC11fVba2ZSXZOsuvkOLAcWDPE+iRJDDAgkpwDfAPYP8ltSd7QzTqWKaeXkjw5yYXd5CLgsiRXAd8CLqiqrwyqTklS2yC/xXTcNO0nNtp+CBzdjd8MHDiouiRJM+Od1JKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNQ3yjXJnJtmQZE1f23uTrEuyuhuOnmbZFUluSHJTkpMHVaMkaXqDPII4C1jRaP9QVR3UDRdOnZlkO+DvgaOAA4DjkhwwwDolSQ0DC4iquhS4ewsWPQS4qapurqqfA58BjpnV4iRJmzWKaxBvS3J1dwpq98b8vYBb+6Zv69qakqxMMp5kfGJiYrZrlaQFa9gB8RHgN4CDgPXAB7Z2hVV1elUtq6plY2NjW7s6SVJnqAFRVXdU1S+r6kHgY/ROJ021Dti7b3pJ1yZJGqKhBkSSxX2TvwOsaXT7NrBfkqck2RE4Fjh/GPVJkh6y/aBWnOQc4AhgzyS3AacARyQ5CChgLfCmru+TgY9X1dFVtTHJ24CLgO2AM6vq2kHVKUlqG1hAVNVxjeYzpun7Q+DovukLgUd8BVaSNDzeSS1JajIgJElNBoQkqcmAkCQ1GRCSpCYDQpLUZEBIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaBhYQSc5MsiHJmr62v0pyfZKrk5ybZLdpll2b5Jokq5OMD6pGSdL0BnkEcRawYkrbKuAZVfXbwPeAd21i+edV1UFVtWxA9UmSNmFgAVFVlwJ3T2m7uKo2dpOXA0sGtX1J0tYZ5TWI3wW+PM28Ai5OckWSlUOsSZLUGdg7qTclybuBjcCnp+lyeFWtS/LrwKok13dHJK11rQRWAuyzzz4DqVeSFqKhH0EkORF4CXB8VVWrT1Wt635uAM4FDplufVV1elUtq6plY2NjA6hYkhamoQZEkhXAO4GXVdVPpumzc5JdJ8eB5cCaVl9J0uAM8muu5wDfAPZPcluSNwCnArvSO220OslpXd8nJ7mwW3QRcFmSq4BvARdU1VcGVackqW1g1yCq6rhG8xnT9P0hcHQ3fjNw4KDqkiTNjHdSS5KaZnQE0d3x/Dpgaf8yVfX7A6lKkjRyMz3FdCG9G9uuAR4cXDmSpG3FTANip6r6g4FWIknapsz0GsQnk/xeksVJ9pgcBlqZJGmkZnoE8XPgr4B303sMBt3Ppw6iKEnS6M00IE4CfrOq7hxkMZKkbcdMTzHdBDTvfJYkzU8zPYJ4AFid5GvAzyYb/ZqrJM1fMw2I87pBkrRAzCggqursQRciSdq2zPRO6h/w0LeXfqWq/BaTJM1TMz3F1P9e6J2AVwLeByFJ89iMvsVUVXf1Deuq6sPAiwdbmiRplGZ6iungvsnH0DuiGMnrSiVJwzHTP/If4KFrEBuBtfROM0mS5qmZ3ih3FL2X/VwC/DuwDjh2cwslOTPJhiRr+tr2SLIqyY3dz92nWfaErs+NSU6YYZ2SpFky04A4D3gp8Avg/m54YAbLnQWsmNJ2MnBJVe1HL3BOnrpQ9yDAU4BDgUOAU6YLEknSYMz0FNOSqpr6h36zqurSJEunNB8DHNGNnw18HfhfU/q8CFhVVXcDJFlFL2jOebQ1SJK2zEyPIP4jyX+epW0uqqr13fjtwKJGn72AW/umb+vaHiHJyiTjScYnJiZmqURJ0kwD4nDgiiQ3JLk6yTVJrt7ajVdV0bgB71Gu4/SqWlZVy8bGxra2JElSZ6anmI6axW3ekWRxVa1PshjY0OizjodOQwEsoXcqSpI0JDO9Ue6W1rCF2zwfmPxW0gnAFxt9LgKWJ9m9uzi9vGuTJA3JTE8xbZEk5wDfAPZPcluSNwDvB45MciPwwm6aJMuSfByguzj9p8C3u+F9kxesJUnDMdC7oavquGlmvaDRdxx4Y9/0mcCZAypNkrQZAz2CkCTNXQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQEiSmgyILbT05AtGXYIkDZQBIUlqGnpAJNk/yeq+4b4k75jS54gk9/b1ec+w65SkhW6gb5RrqaobgIMAkmwHrAPObXT9t6p6yRBLkyT1GfUpphcA36+qW0ZchyRpilEHxLHAOdPMe1aSq5J8OcnTp1tBkpVJxpOMT0xMDKZKSVqARhYQSXYEXgZ8rjH7SmDfqjoQ+DvgvOnWU1WnV9Wyqlo2NjY2kFolaSEa5RHEUcCVVXXH1BlVdV9V3d+NXwjskGTPYRcoSQvZKAPiOKY5vZTkSUnSjR9Cr867hlibJC14Q/8WE0CSnYEjgTf1tb0ZoKpOA14BvCXJRuCnwLFVVaOoVZIWqpEERFU9ADxxSttpfeOnAqcOuy5J0kNG/S0mSdI2yoCQJDUZEJKkJgNCktRkQEiSmgwISVKTASFJajIgJElNBoQkqcmAmCW+o1rSfGNASJKaDAhJUpMBIUlqMiAkSU0GhCSpaZTvpF6b5Jokq5OMN+Ynyd8muSnJ1UkOHkWdkrRQjeSFQX2eV1V3TjPvKGC/bjgU+Ej3U5I0BNvyKaZjgE9Uz+XAbkkWj7ooSVooRhkQBVyc5IokKxvz9wJu7Zu+rWt7mCQrk4wnGZ+YmBhQqdLs8aZKzRWjDIjDq+pgeqeS3prkuVuykqo6vaqWVdWysbGx2a1QkhawkQVEVa3rfm4AzgUOmdJlHbB33/SSrk2SNAQjCYgkOyfZdXIcWA6smdLtfOB13beZDgPurar1Qy5VkhasUX2LaRFwbpLJGv5PVX0lyZsBquo04ELgaOAm4CfA60dUqyQtSCMJiKq6GTiw0X5a33gBbx1mXZKkh2zLX3OVJI2QASFJajIgJElNBoQkqcmAmKFt4e7XbaGG+cTPU9o0A0KS1GRASJKaDAhJUpMBIUlqMiAkSU0GhCSpyYCQJDUZEJKkJgNCktRkQGzC1DttNzc9XdvWbPfRrM87g2eHn6PUY0BIkpqGHhBJ9k7ytSTfTXJtkrc3+hyR5N4kq7vhPcOuU5IWulG8UW4jcFJVXdm9l/qKJKuq6rtT+v1bVb1kBPVJkhjBEURVra+qK7vxHwPXAXsNuw5J0qaN9BpEkqXAM4FvNmY/K8lVSb6c5OmbWMfKJONJxicmJgZVqiQtOCMLiCS7AJ8H3lFV902ZfSWwb1UdCPwdcN5066mq06tqWVUtGxsbG1i9krTQjCQgkuxALxw+XVVfmDq/qu6rqvu78QuBHZLsOeQyJWlBG8W3mAKcAVxXVR+cps+Tun4kOYRenXcNr0pJ0ii+xfRs4LXANUlWd21/DOwDUFWnAa8A3pJkI/BT4NiqqhHUKkkL1tADoqouA7KZPqcCpw6nos2b6Z21k/2WnnwBa9//4oFta7JP/zY2tdzW1CNp4fJOaklSkwEhSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKaDAhJUpMBsRlb8l7qqe1LT75g2vdX98+b6TKb2/ZM529p30EsL2nbY0BIkpoMCElSkwEhSWoyICRJTQaEJKnJgJAkNY3qndQrktyQ5KYkJzfmPzbJZ7v530yydARlStKCNop3Um8H/D1wFHAAcFySA6Z0ewPwo6r6TeBDwF8Ot0pJ0iiOIA4Bbqqqm6vq58BngGOm9DkGOLsb/yfgBUk2+ZpSSdLsSlUNd4PJK4AVVfXGbvq1wKFV9ba+Pmu6Prd109/v+tzZWN9KYGU3uT9wwxaWtifwiPXPUwtpX2Fh7a/7On8Nan/3raqx1oztB7Cxoaqq04HTt3Y9ScaratkslLTNW0j7Cgtrf93X+WsU+zuKU0zrgL37ppd0bc0+SbYHngDcNZTqJEnAaALi28B+SZ6SZEfgWOD8KX3OB07oxl8BfLWGfS5Mkha4oZ9iqqqNSd4GXARsB5xZVdcmeR8wXlXnA2cAn0xyE3A3vRAZtK0+TTWHLKR9hYW1v+7r/DX0/R36RWpJ0tzgndSSpCYDQpLUZECw+Ud/zDVJzkyyobufZLJtjySrktzY/dy9a0+Sv+32/eokB4+u8kcvyd5Jvpbku0muTfL2rn3e7W+SnZJ8K8lV3b7+Sdf+lO6RNDd1j6jZsWuf84+sSbJdku8k+VI3PZ/3dW2Sa5KsTjLetY3093jBB8QMH/0x15wFrJjSdjJwSVXtB1zSTUNvv/frhpXAR4ZU42zZCJxUVQcAhwFv7f77zcf9/Rnw/Ko6EDgIWJHkMHqPovlQ92iaH9F7VA3Mj0fWvB24rm96Pu8rwPOq6qC++x1G+3tcVQt6AJ4FXNQ3/S7gXaOuaxb2aymwpm/6BmBxN74YuKEb/yhwXKvfXByALwJHzvf9BR4PXAkcSu/u2u279l/9PtP7puCzuvHtu34Zde2PYh+X0Puj+HzgS0Dm6752da8F9pzSNtLf4wV/BAHsBdzaN31b1zbfLKqq9d347cCibnze7H93WuGZwDeZp/vbnXJZDWwAVgHfB+6pqo1dl/79+dW+dvPvBZ441IK3zoeBdwIPdtNPZP7uK0ABFye5onuEEIz493jOP2pDj15VVZJ59f3mJLsAnwfeUVX39T/bcT7tb1X9EjgoyW7AucDTRlvRYCR5CbChqq5IcsSIyxmWw6tqXZJfB1Ylub5/5ih+jz2CmNmjP+aDO5IsBuh+buja5/z+J9mBXjh8uqq+0DXP2/0FqKp7gK/RO82yW/dIGnj4/szlR9Y8G3hZkrX0nvj8fOBvmJ/7CkBVret+bqAX/ocw4t9jA2Jmj/6YD/ofX3ICvXP1k+2v674VcRhwb98h7TYvvUOFM4DrquqDfbPm3f4mGeuOHEjyOHrXWq6jFxSv6LpN3dc5+ciaqnpXVS2pqqX0/k1+taqOZx7uK0CSnZPsOjkOLAfWMOrf41FfmNkWBuBo4Hv0zue+e9T1zML+nAOsB35B79zkG+idj70EuBH4F2CPrm/ofYvr+8A1wLJR1/8o9/VweudurwZWd8PR83F/gd8GvtPt6xrgPV37U4FvATcBnwMe27Xv1E3f1M1/6qj3YQv3+wjgS/N5X7v9uqobrp38OzTq32MftSFJavIUkySpyYCQJDUZEJKkJgNCktRkQEiSmryTWtoKSX5J72uGk15eVWtHVI40q/yaq7QVktxfVbs8ymVC79/eg5vtLI2Qp5ikWZRklySXJLmye7b/MV370vTeOfIJeje57Z3kj5J8u3ue/5+MtnLpkTzFJG2dx3VPVwX4AfBK4Heq98DAPYHLk0w+umU/4ISqujzJ8m76EHp3xZ6f5LlVdemQ65emZUBIW+enVXXQ5ET34MA/T/Jceo+p3ouHHtF8S1Vd3o0v74bvdNO70AsMA0LbDANCml3HA2PAf6mqX3RPI92pm/dAX78Af1FVHx1yfdKMeQ1Cml1PoPceg18keR6w7zT9LgJ+t3uPBUn26t4DIG0zPIKQZtengX9Ocg0wDlzf6lRVFyf5LeAb3cuN7gdew0PP+5dGzq+5SpKaPMUkSWoyICRJTQaEJKnJgJAkNRkQkqQmA0KS1GRASJKa/j8dfxZ3CX2QIgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#同理看一下测试集中的 Fare 的分布\n",
    "print(\"看下data['Fare']里面的统计之后有哪些值：\",test_data['Fare'].value_counts().index.tolist())\n",
    "#同理做一下可视化\n",
    "plt.bar(test_data['Fare'].value_counts().index,test_data['Fare'].value_counts().values)#横轴是Fare的值，纵轴是Fare的数量\n",
    "plt.title(\"Fare in test_data\")\n",
    "plt.xlabel(\"Fare\")\n",
    "plt.ylabel(\"num\")\n",
    "plt.show()#感觉这一行的数据用众数填充充吧  毕竟是票价 就那几个票价"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    7.75\n",
      "dtype: float64 <class 'pandas.core.series.Series'>\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(test_data['Fare'].mode(),type(test_data['Fare'].mode()))#注意这里用众数填充 mode()之后是个serise \n",
    "test_data['Fare'].fillna(test_data['Fare'].mode()[0],inplace=True)#要mode()[0]才能拿到具体的数字 否则这里填充不了\n",
    "test_data['Fare'].isnull().sum()#test_data的Age这一列已经没有缺失值了  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 891 entries, 0 to 890\n",
      "Data columns (total 11 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   PassengerId  891 non-null    int64  \n",
      " 1   Survived     891 non-null    int64  \n",
      " 2   Pclass       891 non-null    int64  \n",
      " 3   Name         891 non-null    object \n",
      " 4   Sex          891 non-null    object \n",
      " 5   Age          891 non-null    float64\n",
      " 6   SibSp        891 non-null    int64  \n",
      " 7   Parch        891 non-null    int64  \n",
      " 8   Ticket       891 non-null    object \n",
      " 9   Fare         891 non-null    float64\n",
      " 10  Embarked     891 non-null    object \n",
      "dtypes: float64(2), int64(5), object(4)\n",
      "memory usage: 76.7+ KB\n"
     ]
    }
   ],
   "source": [
    "#再看下训练集和测试集中是否有缺失值\n",
    "train_data.info()#数据都填充满了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 418 entries, 0 to 417\n",
      "Data columns (total 10 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   PassengerId  418 non-null    int64  \n",
      " 1   Pclass       418 non-null    int64  \n",
      " 2   Name         418 non-null    object \n",
      " 3   Sex          418 non-null    object \n",
      " 4   Age          418 non-null    float64\n",
      " 5   SibSp        418 non-null    int64  \n",
      " 6   Parch        418 non-null    int64  \n",
      " 7   Ticket       418 non-null    object \n",
      " 8   Fare         418 non-null    float64\n",
      " 9   Embarked     418 non-null    object \n",
      "dtypes: float64(2), int64(4), object(4)\n",
      "memory usage: 32.8+ KB\n"
     ]
    }
   ],
   "source": [
    "test_data.info()#数据都填充满了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 特征选择\n",
    "features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']\n",
    "train_features = train_data[features]\n",
    "train_labels = train_data['Survived']\n",
    "test_features = test_data[features]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>male</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>female</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>female</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3</td>\n",
       "      <td>male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>886</th>\n",
       "      <td>2</td>\n",
       "      <td>male</td>\n",
       "      <td>27.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>13.0000</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>887</th>\n",
       "      <td>1</td>\n",
       "      <td>female</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>30.0000</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>888</th>\n",
       "      <td>3</td>\n",
       "      <td>female</td>\n",
       "      <td>29.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>23.4500</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>889</th>\n",
       "      <td>1</td>\n",
       "      <td>male</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>30.0000</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890</th>\n",
       "      <td>3</td>\n",
       "      <td>male</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7.7500</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>891 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Pclass     Sex   Age  SibSp  Parch     Fare Embarked\n",
       "0         3    male  22.0      1      0   7.2500        S\n",
       "1         1  female  38.0      1      0  71.2833        C\n",
       "2         3  female  26.0      0      0   7.9250        S\n",
       "3         1  female  35.0      1      0  53.1000        S\n",
       "4         3    male  35.0      0      0   8.0500        S\n",
       "..      ...     ...   ...    ...    ...      ...      ...\n",
       "886       2    male  27.0      0      0  13.0000        S\n",
       "887       1  female  19.0      0      0  30.0000        S\n",
       "888       3  female  29.0      1      2  23.4500        S\n",
       "889       1    male  26.0      0      0  30.0000        C\n",
       "890       3    male  32.0      0      0   7.7500        Q\n",
       "\n",
       "[891 rows x 7 columns]"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#清洗到此结束吧  后续机器学习建模的再说"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
